branch:
sanitize.ts
5984 bytesRaw
/**
 * Message sanitization and row-size enforcement utilities.
 *
 * Shared by Think to ensure persistence
 * hygiene: stripping ephemeral provider metadata and compacting
 * oversized messages before writing to SQLite.
 */

import type { ProviderMetadata, ReasoningUIPart, UIMessage } from "ai";

/** Shared encoder for UTF-8 byte length measurement */
const textEncoder = new TextEncoder();

/** Maximum serialized message size before compaction (bytes). 1.8MB with headroom below SQLite's 2MB limit. */
const ROW_MAX_BYTES = 1_800_000;

/** Measure UTF-8 byte length of a string. */
function byteLength(s: string): number {
  return textEncoder.encode(s).byteLength;
}

/**
 * Sanitize a message for persistence by removing ephemeral provider-specific
 * data that should not be stored or sent back in subsequent requests.
 *
 * 1. Strips OpenAI ephemeral fields (itemId, reasoningEncryptedContent)
 * 2. Filters truly empty reasoning parts (no text, no remaining providerMetadata)
 */
export function sanitizeMessage(message: UIMessage): UIMessage {
  // Strip OpenAI-specific ephemeral data from all parts
  const strippedParts = message.parts.map((part) => {
    let sanitizedPart = part;

    if (
      "providerMetadata" in sanitizedPart &&
      sanitizedPart.providerMetadata &&
      typeof sanitizedPart.providerMetadata === "object" &&
      "openai" in sanitizedPart.providerMetadata
    ) {
      sanitizedPart = stripOpenAIMetadata(sanitizedPart, "providerMetadata");
    }

    if (
      "callProviderMetadata" in sanitizedPart &&
      sanitizedPart.callProviderMetadata &&
      typeof sanitizedPart.callProviderMetadata === "object" &&
      "openai" in sanitizedPart.callProviderMetadata
    ) {
      sanitizedPart = stripOpenAIMetadata(
        sanitizedPart,
        "callProviderMetadata"
      );
    }

    return sanitizedPart;
  }) as UIMessage["parts"];

  // Filter out reasoning parts that are truly empty
  const sanitizedParts = strippedParts.filter((part) => {
    if (part.type === "reasoning") {
      const reasoningPart = part as ReasoningUIPart;
      if (!reasoningPart.text || reasoningPart.text.trim() === "") {
        if (
          "providerMetadata" in reasoningPart &&
          reasoningPart.providerMetadata &&
          typeof reasoningPart.providerMetadata === "object" &&
          Object.keys(reasoningPart.providerMetadata).length > 0
        ) {
          return true;
        }
        return false;
      }
    }
    return true;
  });

  return { ...message, parts: sanitizedParts };
}

/**
 * Strip OpenAI-specific ephemeral fields from a metadata object.
 */
function stripOpenAIMetadata<T extends UIMessage["parts"][number]>(
  part: T,
  metadataKey: "providerMetadata" | "callProviderMetadata"
): T {
  const metadata = (part as Record<string, unknown>)[metadataKey] as {
    openai?: Record<string, unknown>;
    [key: string]: unknown;
  };

  if (!metadata?.openai) return part;

  const {
    itemId: _itemId,
    reasoningEncryptedContent: _rec,
    ...restOpenai
  } = metadata.openai;

  const hasOtherOpenaiFields = Object.keys(restOpenai).length > 0;
  const { openai: _openai, ...restMetadata } = metadata;

  let newMetadata: ProviderMetadata | undefined;
  if (hasOtherOpenaiFields) {
    newMetadata = { ...restMetadata, openai: restOpenai } as ProviderMetadata;
  } else if (Object.keys(restMetadata).length > 0) {
    newMetadata = restMetadata as ProviderMetadata;
  }

  const { [metadataKey]: _oldMeta, ...restPart } = part as Record<
    string,
    unknown
  >;

  if (newMetadata) {
    return { ...restPart, [metadataKey]: newMetadata } as T;
  }
  return restPart as T;
}

/**
 * Enforce SQLite row size limits by compacting tool outputs and text parts
 * when a serialized message exceeds the safety threshold (1.8MB).
 *
 * Compaction strategy:
 * 1. Compact tool outputs over 1KB (replace with summary)
 * 2. If still too big, truncate text parts from oldest to newest
 */
export function enforceRowSizeLimit(message: UIMessage): UIMessage {
  let json = JSON.stringify(message);
  let size = byteLength(json);
  if (size <= ROW_MAX_BYTES) return message;

  if (message.role !== "assistant") {
    return truncateTextParts(message);
  }

  // Pass 1: compact tool outputs
  const compactedParts = message.parts.map((part) => {
    if (
      "output" in part &&
      "toolCallId" in part &&
      "state" in part &&
      part.state === "output-available"
    ) {
      const outputJson = JSON.stringify((part as { output: unknown }).output);
      if (outputJson.length > 1000) {
        return {
          ...part,
          output:
            "This tool output was too large to persist in storage " +
            `(${outputJson.length} bytes). ` +
            "If the user asks about this data, suggest re-running the tool. " +
            `Preview: ${outputJson.slice(0, 500)}...`
        };
      }
    }
    return part;
  }) as UIMessage["parts"];

  let result: UIMessage = { ...message, parts: compactedParts };

  json = JSON.stringify(result);
  size = byteLength(json);
  if (size <= ROW_MAX_BYTES) return result;

  // Pass 2: truncate text parts
  return truncateTextParts(result);
}

/**
 * Truncate text parts to fit within the row size limit.
 */
function truncateTextParts(message: UIMessage): UIMessage {
  const parts = [...message.parts];

  for (let i = 0; i < parts.length; i++) {
    const part = parts[i];
    if (part.type === "text" && "text" in part) {
      const text = (part as { text: string }).text;
      if (text.length > 1000) {
        parts[i] = {
          ...part,
          text:
            `[Text truncated for storage (${text.length} chars). ` +
            `First 500 chars: ${text.slice(0, 500)}...]`
        } as UIMessage["parts"][number];

        const candidate = { ...message, parts };
        if (byteLength(JSON.stringify(candidate)) <= ROW_MAX_BYTES) {
          break;
        }
      }
    }
  }

  return { ...message, parts };
}