branch:
codemode.spec.ts
3588 bytesRaw
import { test, expect } from "@playwright/test";

/**
 * E2E tests for @cloudflare/codemode with a real AI binding.
 *
 * These verify the full pipeline:
 *   user prompt → LLM generates code via createCodeTool → DynamicWorkerExecutor
 *   runs the code in an isolated Worker → tool functions called via RPC → result returned.
 *
 * Uses Workers AI (@cf/moonshotai/kimi-k2.5) — no API key needed.
 */

async function runChat(
  request: import("@playwright/test").APIRequestContext,
  baseURL: string,
  userMessage: string
): Promise<string> {
  const res = await request.post(`${baseURL}/run`, {
    headers: { "Content-Type": "application/json" },
    data: {
      messages: [
        {
          id: `msg-${crypto.randomUUID()}`,
          role: "user",
          parts: [{ type: "text", text: userMessage }]
        }
      ]
    },
    timeout: 45_000
  });
  expect(res.ok()).toBe(true);
  return res.text();
}

test.describe("codemode e2e (Workers AI)", () => {
  test.setTimeout(45_000);

  test("LLM generates and executes code that calls addNumbers tool", async ({
    request,
    baseURL
  }) => {
    const response = await runChat(
      request,
      baseURL!,
      "What is 17 + 25? Use the codemode tool with the addNumbers function to calculate this."
    );

    // The response stream should contain the answer 42 somewhere
    // (either in the tool result or the LLM's text response)
    expect(response).toContain("42");
  });

  test("LLM generates and executes code that calls getWeather tool", async ({
    request,
    baseURL
  }) => {
    const response = await runChat(
      request,
      baseURL!,
      "What is the weather in London? Use the codemode tool with the getWeather function."
    );

    // The getWeather tool returns { city: "London", temperature: 22, condition: "Sunny" }
    // The LLM should mention London or the weather data in its response
    const lower = response.toLowerCase();
    expect(
      lower.includes("london") ||
        lower.includes("22") ||
        lower.includes("sunny")
    ).toBe(true);
  });

  test("LLM generates and executes code that calls listProjects tool", async ({
    request,
    baseURL
  }) => {
    const response = await runChat(
      request,
      baseURL!,
      "List all projects using the codemode tool with the listProjects function."
    );

    // listProjects returns Alpha and Beta
    const lower = response.toLowerCase();
    expect(lower.includes("alpha") || lower.includes("beta")).toBe(true);
  });

  test("LLM generates code with multiple tool calls", async ({
    request,
    baseURL
  }) => {
    const response = await runChat(
      request,
      baseURL!,
      "Using the codemode tool, first get the weather in Paris, then add the numbers 10 and 5. Return both results."
    );

    // Should contain evidence of both tool calls completing
    const lower = response.toLowerCase();
    expect(
      lower.includes("paris") ||
        lower.includes("22") ||
        lower.includes("15") ||
        lower.includes("sunny")
    ).toBe(true);
  });

  test("generateTypes returns valid type definitions", async ({
    request,
    baseURL
  }) => {
    const res = await request.get(`${baseURL}/types`);
    expect(res.ok()).toBe(true);

    const data = await res.json();
    const types = data.types as string;

    expect(types).toContain("declare const codemode");
    expect(types).toContain("addNumbers");
    expect(types).toContain("getWeather");
    expect(types).toContain("createProject");
    expect(types).toContain("listProjects");
  });
});