fix(ai): compress Catty requests only after 413 (#1327)

* fix(ai): compress Catty requests only after 413 * fix(ai): retry 413 after tool progress safely * fix(ai): mark thrown 413 retries after tool progress * fix(ai): preserve tool results in 413 retry
2026-06-09 13:11:42 +08:00
parent 3bc373dbec
commit 517cbb6cee
10 changed files with 578 additions and 746 deletions
--- a/components/ai/cattyHistoryReplay.test.ts
+++ b/components/ai/cattyHistoryReplay.test.ts
@@ -91,6 +91,23 @@ test("buildHistoricalToolResultReplayText keeps non-terminal tool results intact
  assert.equal(buildHistoricalToolResultReplayText(result, toolCall), "search result summary");
 });
 test("buildHistoricalToolResultReplayText can preserve terminal output for 413 retries", () => {
  const toolCall: ToolCall = {
    id: "call-1",
    name: "terminal_execute",
    arguments: { command: "npm test" },
  };
  const result: ToolResult = {
    toolCallId: "call-1",
    content: "real terminal output",
  };
  assert.equal(
    buildHistoricalToolResultReplayText(result, toolCall, { preserveTerminalOutput: true }),
    "real terminal output",
  );
 });
 test("buildHistoricalToolReplayMaps pairs reused tool ids with the nearest preceding call", () => {
  const messages: ChatMessage[] = [
    {
--- a/components/ai/cattyHistoryReplay.ts
+++ b/components/ai/cattyHistoryReplay.ts
@@ -112,9 +112,14 @@ function findLastIndex<T>(items: T[], predicate: (item: T) => boolean): number {
 export function buildHistoricalToolResultReplayText(
  result: ToolResult,
  toolCall?: ToolCall,
  {
    preserveTerminalOutput = false,
  }: {
    preserveTerminalOutput?: boolean;
  } = {},
 ): string {
  const toolName = toolCall?.name ?? "unknown";
-  if (!isTerminalToolName(toolName)) {
+  if (!isTerminalToolName(toolName) || preserveTerminalOutput) {
    return result.content;
  }
--- a/components/ai/hooks/useAIChatStreaming.ts
+++ b/components/ai/hooks/useAIChatStreaming.ts
@@ -21,6 +21,7 @@ import type {
  ExternalAgentConfig,
  ProviderAdvancedParams,
  ProviderConfig,
  ToolResult,
  WebSearchConfig,
 } from '../../../infrastructure/ai/types';
 import { isWebSearchReady } from '../../../infrastructure/ai/types';
@@ -36,9 +37,12 @@ import {
  resolveContextWindow,
 } from '../../../infrastructure/ai/contextCompaction';
 import {
-  estimateUtf8Bytes,
+  compressMessagesForRequestTooLargeRetry,
-  fitMessagesToRequestPayloadBudget,
+} from '../../../infrastructure/ai/requestPayloadCompression';
-} from '../../../infrastructure/ai/requestPayloadBudget';
+import {
  createCattyRequestTooLargeRetryError,
  hadToolProgressBeforeRequestTooLarge,
 } from '../../../infrastructure/ai/cattyRequestTooLargeRetry';
 import { createModelFromConfig } from '../../../infrastructure/ai/sdk/providers';
 import { createCattyTools } from '../../../infrastructure/ai/sdk/tools';
 import type { ExecutorContext } from '../../../infrastructure/ai/cattyAgent/executor';
@@ -50,6 +54,7 @@ import {
  buildPromptWithTerminalSelectionAttachments,
  isTerminalSelectionAttachment,
 } from '../../../application/state/terminalSelectionAttachment';
 import { latestAISessionsSnapshot } from '../../../application/state/aiStateSnapshots';
 import {
  buildHistoricalToolReplayMaps,
  buildHistoricalToolResultReplayText,
@@ -343,7 +348,7 @@ export function useAIChatStreaming({
    // Track the current assistant message ID so updates target the correct message
    let activeMsgId = currentAssistantMsgId;
    let lastAddedRole: 'assistant' | 'tool' = 'assistant';
-    let hasRetryUnsafeToolProgress = false;
+    let hadToolProgress = false;
    const reader = result.fullStream.getReader();
    // -- Text-delta batching: accumulate deltas and flush periodically --
@@ -419,7 +424,16 @@ export function useAIChatStreaming({
    try {
    while (true) {
-      const { done, value } = await reader.read();
+      let readResult: ReadableStreamReadResult<unknown>;
      try {
        readResult = await reader.read();
      } catch (readErr) {
        if (isRequestTooLargeError(readErr)) {
          throw createCattyRequestTooLargeRetryError(readErr, hadToolProgress);
        }
        throw readErr;
      }
      const { done, value } = readResult;
      if (done) break;
      // Use the StreamChunk union for type narrowing instead of unsafe casts
      const chunk = value as StreamChunk;
@@ -486,7 +500,7 @@ export function useAIChatStreaming({
          cancelPendingFlush();
          flushText();
          const typedChunk = chunk as ToolCallChunk;
-          hasRetryUnsafeToolProgress = true;
+          hadToolProgress = true;
          const messageId = ensureAssistantMessage();
          const providerOptions = normalizeProviderContinuationOptions(typedChunk.providerMetadata);
          updateMessageById(streamSessionId, messageId, msg => ({
@@ -512,7 +526,7 @@ export function useAIChatStreaming({
          cancelPendingFlush();
          flushText();
          const typedChunk = chunk as ToolResultChunk;
-          hasRetryUnsafeToolProgress = true;
+          hadToolProgress = true;
          // Mark the assistant message's tool execution as completed
          updateMessageById(streamSessionId, activeMsgId, msg =>
            msg.role === 'assistant' && msg.executionStatus === 'running'
@@ -559,10 +573,13 @@ export function useAIChatStreaming({
            console.warn('[Catty] suppressed SDK stream state error:', typedChunk.error);
            break;
          }
-          if (isRequestTooLargeError(typedChunk.error) && !hasRetryUnsafeToolProgress) {
+          if (isRequestTooLargeError(typedChunk.error)) {
            cancelPendingFlush();
            flushText();
-            throw typedChunk.error;
+            throw createCattyRequestTooLargeRetryError(
              typedChunk.error,
              hadToolProgress,
            );
          }
          cancelPendingFlush();
          flushText();
@@ -796,44 +813,86 @@ export function useAIChatStreaming({
    };
    try {
-      // Issue #5: Build SDK messages including tool-call and tool-result messages
+      let openAIChatAssistantFieldsByMessage = new Map<ModelMessage, OpenAIChatAssistantFields | undefined>();
-      // so the LLM maintains full conversation context
+      const buildSdkMessages = (
-      const allMessages = currentSession?.messages ?? [];
+        allMessages: ChatMessage[],
        includeCurrentUserMessage: boolean,
        {
          preserveTerminalToolResults = new Set<ToolResult>(),
        }: {
          preserveTerminalToolResults?: ReadonlySet<ToolResult>;
        } = {},
      ): Array<ModelMessage> => {
        const { resolvedToolCallsByAssistant, toolCallByToolResult } = buildHistoricalToolReplayMaps(allMessages);
        const nextFieldsByMessage = new Map<ModelMessage, OpenAIChatAssistantFields | undefined>();
        const sdkMessages: Array<ModelMessage> = [];
        let previousHistoryMessageWasToolResult = false;
-      const { resolvedToolCallsByAssistant, toolCallByToolResult } = buildHistoricalToolReplayMaps(allMessages);
+        for (const m of allMessages) {
-
+          const currentMessageFollowsToolResult = previousHistoryMessageWasToolResult;
-      const sdkMessages: Array<ModelMessage> = [];
+          if (m.role === 'user') {
-      const openAIChatAssistantFieldsByMessage = new Map<ModelMessage, OpenAIChatAssistantFields | undefined>();
+            // Historical attachments are replayed as placeholders so screenshots,
-      let previousHistoryMessageWasToolResult = false;
+            // files, and terminal selections do not balloon every follow-up request.
-      for (const m of allMessages) {
+            const messageAttachments = m.attachments ?? m.images;
-        const currentMessageFollowsToolResult = previousHistoryMessageWasToolResult;
+            sdkMessages.push({
-        if (m.role === 'user') {
+              role: 'user',
-          // Historical attachments are replayed as placeholders so screenshots,
+              content: buildHistoricalUserReplayContent(m.content, messageAttachments ?? []),
-          // files, and terminal selections do not balloon every follow-up request.
+            });
-          const messageAttachments = m.attachments ?? m.images;
+          } else if (m.role === 'assistant') {
-          sdkMessages.push({
+            const activeContinuation = isProviderContinuationForSource(
-            role: 'user',
+              m.providerContinuation,
-            content: buildHistoricalUserReplayContent(m.content, messageAttachments ?? []),
+              continuationContext.source,
-          });
+            )
-        } else if (m.role === 'assistant') {
+              ? m.providerContinuation
-          const activeContinuation = isProviderContinuationForSource(
+              : undefined;
-            m.providerContinuation,
+            const openAIChatAssistantFields = getOpenAIChatAssistantFieldsForHistoryMessage(
-            continuationContext.source,
+              m,
-          )
+              continuationContext.source,
-            ? m.providerContinuation
+            );
-            : undefined;
+            if (m.toolCalls?.length) {
-          const openAIChatAssistantFields = getOpenAIChatAssistantFieldsForHistoryMessage(
+              // Only include tool calls that have matching results
-            m,
+              const resolvedToolCalls = resolvedToolCallsByAssistant.get(m);
-            continuationContext.source,
+              const resolvedCalls = resolvedToolCalls
-          );
+                ? m.toolCalls.filter(tc => resolvedToolCalls.has(tc))
-          if (m.toolCalls?.length) {
+                : [];
-            // Only include tool calls that have matching results
+              const contentParts: AssistantContentPart[] = [];
-            const resolvedToolCalls = resolvedToolCallsByAssistant.get(m);
+              if (resolvedCalls.length > 0) {
-            const resolvedCalls = resolvedToolCalls
+                for (const part of activeContinuation?.reasoningParts ?? []) {
-              ? m.toolCalls.filter(tc => resolvedToolCalls.has(tc))
+                  if (!part.text && !part.providerOptions) continue;
-              : [];
+                  contentParts.push({
-            const contentParts: AssistantContentPart[] = [];
+                    type: 'reasoning' as const,
-            if (resolvedCalls.length > 0) {
+                    text: part.text,
                    ...(part.providerOptions ? { providerOptions: part.providerOptions } : {}),
                  });
                }
              }
              if (m.content) {
                contentParts.push({
                  type: 'text' as const,
                  text: m.content,
                  ...(activeContinuation?.textProviderOptions ? { providerOptions: activeContinuation.textProviderOptions } : {}),
                });
              }
              for (const tc of resolvedCalls) {
                const providerOptions = activeContinuation?.toolCallProviderOptionsById?.[tc.id];
                contentParts.push({
                  type: 'tool-call' as const,
                  toolCallId: tc.id,
                  toolName: tc.name,
                  input: tc.arguments ?? {},
                  ...(providerOptions ? { providerOptions } : {}),
                });
              }
              // If all tool calls were orphaned, just include the text content
              if (contentParts.length > 0) {
                const message: ModelMessage = { role: 'assistant', content: toAssistantModelContent(contentParts) };
                sdkMessages.push(message);
                if (resolvedCalls.length > 0) {
                  rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, nextFieldsByMessage);
                }
              }
            } else if (m.content) {
              const contentParts: AssistantContentPart[] = [];
              for (const part of activeContinuation?.reasoningParts ?? []) {
                if (!part.text && !part.providerOptions) continue;
                contentParts.push({
@@ -842,95 +901,91 @@ export function useAIChatStreaming({
                  ...(part.providerOptions ? { providerOptions: part.providerOptions } : {}),
                });
              }
            }
            if (m.content) {
              contentParts.push({
                type: 'text' as const,
                text: m.content,
                ...(activeContinuation?.textProviderOptions ? { providerOptions: activeContinuation.textProviderOptions } : {}),
              });
-            }
+              const message: ModelMessage = {
-            for (const tc of resolvedCalls) {
+                role: 'assistant',
-              const providerOptions = activeContinuation?.toolCallProviderOptionsById?.[tc.id];
+                content: toAssistantModelContent(contentParts),
-              contentParts.push({
+              };
                type: 'tool-call' as const,
                toolCallId: tc.id,
                toolName: tc.name,
                input: tc.arguments ?? {},
                ...(providerOptions ? { providerOptions } : {}),
              });
            }
            // If all tool calls were orphaned, just include the text content
            if (contentParts.length > 0) {
              const message: ModelMessage = { role: 'assistant', content: toAssistantModelContent(contentParts) };
              sdkMessages.push(message);
-              if (resolvedCalls.length > 0) {
+              if (currentMessageFollowsToolResult) {
-                rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, openAIChatAssistantFieldsByMessage);
+                rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, nextFieldsByMessage);
              }
            }
-          } else if (m.content) {
+          } else if (m.role === 'tool' && m.toolResults?.length) {
-            const contentParts: AssistantContentPart[] = [];
+            sdkMessages.push({
-            for (const part of activeContinuation?.reasoningParts ?? []) {
+              role: 'tool',
-              if (!part.text && !part.providerOptions) continue;
+              content: m.toolResults.map(tr => {
-              contentParts.push({
+                const toolCall = toolCallByToolResult.get(tr);
-                type: 'reasoning' as const,
+                return {
-                text: part.text,
+                  type: 'tool-result' as const,
-                ...(part.providerOptions ? { providerOptions: part.providerOptions } : {}),
+                  toolCallId: tr.toolCallId,
-              });
+                  toolName: toolCall?.name ?? 'unknown',
-            }
+                  output: {
-            contentParts.push({
+                    type: 'text' as const,
-              type: 'text' as const,
+                    value: buildHistoricalToolResultReplayText(tr, toolCall, {
-              text: m.content,
+                      preserveTerminalOutput: preserveTerminalToolResults.has(tr),
-              ...(activeContinuation?.textProviderOptions ? { providerOptions: activeContinuation.textProviderOptions } : {}),
+                    }),
                  },
                };
              }),
            });
-            const message: ModelMessage = {
+          }
-              role: 'assistant',
+          previousHistoryMessageWasToolResult = m.role === 'tool' && !!m.toolResults?.length;
-              content: toAssistantModelContent(contentParts),
+        }
-            };
+
-            sdkMessages.push(message);
+        if (includeCurrentUserMessage) {
-            if (currentMessageFollowsToolResult) {
+          // Build the current user message — include attachments as multimodal content
-              rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, openAIChatAssistantFieldsByMessage);
+          if (attachments?.length) {
            const modelText = buildPromptWithTerminalSelectionAttachments(trimmed, attachments);
            const modelAttachments = attachments.filter(
              (attachment) => !isTerminalSelectionAttachment(attachment),
            );
            if (!modelAttachments.length) {
              sdkMessages.push({ role: 'user', content: modelText });
            } else {
              const parts: Array<{ type: 'text'; text: string } | { type: 'image'; image: string; mediaType?: string } | { type: 'file'; data: string; mediaType: string; filename?: string }> = [];
              parts.push({ type: 'text', text: modelText });
              for (const att of modelAttachments) {
                if (att.mediaType.startsWith('image/')) {
                  parts.push({ type: 'image', image: att.base64Data, mediaType: att.mediaType });
                } else {
                  parts.push({ type: 'file', data: att.base64Data, mediaType: att.mediaType, filename: att.filename });
                }
              }
              sdkMessages.push({ role: 'user', content: parts });
            }
          }
        } else if (m.role === 'tool' && m.toolResults?.length) {
          sdkMessages.push({
            role: 'tool',
            content: m.toolResults.map(tr => {
              const toolCall = toolCallByToolResult.get(tr);
              return {
                type: 'tool-result' as const,
                toolCallId: tr.toolCallId,
                toolName: toolCall?.name ?? 'unknown',
                output: { type: 'text' as const, value: buildHistoricalToolResultReplayText(tr, toolCall) },
              };
            }),
          });
        }
        previousHistoryMessageWasToolResult = m.role === 'tool' && !!m.toolResults?.length;
      }
      // Build the current user message — include attachments as multimodal content
      if (attachments?.length) {
        const modelText = buildPromptWithTerminalSelectionAttachments(trimmed, attachments);
        const modelAttachments = attachments.filter(
          (attachment) => !isTerminalSelectionAttachment(attachment),
        );
        if (!modelAttachments.length) {
          sdkMessages.push({ role: 'user', content: modelText });
        } else {
        const parts: Array<{ type: 'text'; text: string } | { type: 'image'; image: string; mediaType?: string } | { type: 'file'; data: string; mediaType: string; filename?: string }> = [];
        parts.push({ type: 'text', text: modelText });
        for (const att of modelAttachments) {
          if (att.mediaType.startsWith('image/')) {
            parts.push({ type: 'image', image: att.base64Data, mediaType: att.mediaType });
          } else {
-            parts.push({ type: 'file', data: att.base64Data, mediaType: att.mediaType, filename: att.filename });
+            sdkMessages.push({ role: 'user', content: trimmed });
          }
        }
-        sdkMessages.push({ role: 'user', content: parts });
+
        openAIChatAssistantFieldsByMessage = nextFieldsByMessage;
        return sdkMessages;
      };
      const sdkMessages = buildSdkMessages(currentSession?.messages ?? [], true);
      const collectToolResultsAfterMessage = (
        messages: ChatMessage[],
        messageId: string,
      ): Set<ToolResult> => {
        const results = new Set<ToolResult>();
        let afterMessage = false;
        for (const message of messages) {
          if (message.id === messageId) {
            afterMessage = true;
            continue;
          }
          if (!afterMessage || message.role !== 'tool' || !message.toolResults?.length) continue;
          for (const result of message.toolResults) {
            results.add(result);
          }
        }
-      } else {
+        return results;
-        sdkMessages.push({ role: 'user', content: trimmed });
+      };
      }
      // Create model with placeholder API key — the main process injects the real
      // decrypted key when the HTTP request is proxied through IPC, so plaintext
@@ -958,20 +1013,12 @@ export function useAIChatStreaming({
        defaultContextWindow: DEFAULT_CONTEXT_WINDOW_TOKENS,
      });
      const outputReserveTokens = Math.min(4096, Math.ceil(contextWindow * 0.05));
-      const requestReserveTokens = outputReserveTokens + estimateUnknownTokens({
+      const getRequestReserveTokens = () => outputReserveTokens + estimateUnknownTokens({
        systemPrompt,
        toolNames: Object.keys(tools),
        openAIChatAssistantFields: Array.from(openAIChatAssistantFieldsByMessage.values()),
      });
      const payloadReservedBytes = estimateUtf8Bytes({
        system: systemPrompt,
        tools: Object.keys(tools),
      });
      const applyRequestPayloadBudget = (messages: ModelMessage[]) => fitMessagesToRequestPayloadBudget({
        messages,
        reservedBytes: payloadReservedBytes,
      });
      const summarizeForCompaction = async (messagesToSummarize: ModelMessage[]) => {
        updateLastMessage(sessionId, msg => ({ ...msg, statusText: 'Compacting earlier context...' }));
        const result = await generateText({
@@ -999,64 +1046,64 @@ export function useAIChatStreaming({
        );
        return pruned;
      };
-      const compactAndBudgetMessages = async (
+      const compactMessages = async (
        messages: ModelMessage[],
        {
          force = false,
          statusText,
          trimLog,
          fallbackLog,
          compressForRequestTooLargeRetry = false,
          compressionLog,
        }: {
          force?: boolean;
          statusText?: string;
          trimLog: string;
          fallbackLog: string;
          compressForRequestTooLargeRetry?: boolean;
          compressionLog?: string;
        },
      ): Promise<ModelMessage[]> => {
        const compressRetryMessages = (candidateMessages: ModelMessage[], log?: string): ModelMessage[] => {
          if (!compressForRequestTooLargeRetry) return candidateMessages;
          const compressed = compressMessagesForRequestTooLargeRetry(candidateMessages);
          if (compressed.didAdjust && log) {
            console.warn(log);
          }
          return compressed.messages;
        };
        try {
          if (statusText) {
            updateLastMessage(sessionId, msg => ({ ...msg, statusText }));
          }
          const inputMessages = compressRetryMessages(messages, compressionLog);
          const compacted = await prepareContextCompaction({
-            messages,
+            messages: inputMessages,
            contextWindow,
-            reservedTokens: requestReserveTokens,
+            reservedTokens: getRequestReserveTokens(),
            thresholdRatio: force ? 0 : undefined,
            protectRecentMessages: DEFAULT_PROTECT_RECENT_MESSAGES,
            summarize: summarizeForCompaction,
          });
          let nextMessages = force && !compacted.didCompact
-            ? keepRecentContextMessages(messages, DEFAULT_PROTECT_RECENT_MESSAGES)
+            ? keepRecentContextMessages(inputMessages, DEFAULT_PROTECT_RECENT_MESSAGES)
            : compacted.messages;
-          const budgetResult = applyRequestPayloadBudget(nextMessages);
+          return compressRetryMessages(nextMessages);
          if (budgetResult.didAdjust) {
            console.warn(`${trimLog} ${budgetResult.estimatedBytes} bytes.`);
            nextMessages = budgetResult.messages;
          }
          return nextMessages;
        } catch (err) {
          if (abortController.signal.aborted) throw err;
          console.warn(fallbackLog, err);
-          const fallbackBudget = applyRequestPayloadBudget(
+          const fallbackMessages = keepRecentContextMessages(messages, DEFAULT_PROTECT_RECENT_MESSAGES);
-            keepRecentContextMessages(messages, DEFAULT_PROTECT_RECENT_MESSAGES),
+          if (!compressForRequestTooLargeRetry) {
-          );
+            return fallbackMessages;
          if (fallbackBudget.didAdjust) {
            console.warn(
              `[Catty] Request payload trimmed to ${fallbackBudget.estimatedBytes} bytes after compaction fallback.`,
            );
          }
-          return fallbackBudget.messages;
+          const compressed = compressMessagesForRequestTooLargeRetry(fallbackMessages);
          if (compressed.didAdjust) {
            console.warn('[Catty] Request content compressed after compaction fallback.');
          }
          return compressed.messages;
        }
      };
-      const payloadBudgetResult = applyRequestPayloadBudget(sdkMessages);
+      let messagesForStream = sdkMessages;
-      let messagesForStream = payloadBudgetResult.messages;
+      messagesForStream = await compactMessages(messagesForStream, {
      if (payloadBudgetResult.didAdjust) {
        console.warn(
          `[Catty] Request payload trimmed to ${payloadBudgetResult.estimatedBytes} bytes to avoid HTTP 413.`,
        );
      }
      messagesForStream = await compactAndBudgetMessages(messagesForStream, {
        trimLog: '[Catty] Request payload re-trimmed after context compaction to',
        fallbackLog: '[Catty] Context compaction failed; falling back to recent messages only:',
      });
@@ -1080,23 +1127,50 @@ export function useAIChatStreaming({
        }
        console.warn('[Catty] Request hit HTTP 413; forcing context compaction and retrying once.', streamErr);
-        updateMessageById(sessionId, assistantMsgId, msg => ({
+        const statusText = 'Request was too large. Compacting context and retrying...';
-          ...msg,
+        const hadToolProgress = hadToolProgressBeforeRequestTooLarge(streamErr);
-          content: '',
+        let retryBaseMessages = messagesForStream;
-          thinking: undefined,
+        let retryAssistantMsgId = assistantMsgId;
-          thinkingDurationMs: undefined,
+        if (hadToolProgress) {
-          providerContinuation: undefined,
+          const latestSession = latestAISessionsSnapshot?.find(session => session.id === sessionId);
-          toolCalls: undefined,
+          if (latestSession) {
-          errorInfo: undefined,
+            retryBaseMessages = buildSdkMessages(latestSession.messages, false, {
-          executionStatus: undefined,
+              preserveTerminalToolResults: collectToolResultsAfterMessage(
-          pendingApproval: undefined,
+                latestSession.messages,
-          statusText: 'Request was too large. Compacting context and retrying...',
+                assistantMsgId,
-        }));
+              ),
-        const retryMessages = prepareMessagesForStream(await compactAndBudgetMessages(messagesForStream, {
+            });
          }
          retryAssistantMsgId = generateId();
          addMessageToSession(sessionId, {
            id: retryAssistantMsgId,
            role: 'assistant',
            content: '',
            timestamp: Date.now(),
            model: activeModelId || context.activeProvider?.defaultModel || '',
            providerId: context.activeProvider?.providerId,
            statusText,
          });
        } else {
          updateMessageById(sessionId, assistantMsgId, msg => ({
            ...msg,
            content: '',
            thinking: undefined,
            thinkingDurationMs: undefined,
            providerContinuation: undefined,
            toolCalls: undefined,
            errorInfo: undefined,
            executionStatus: undefined,
            pendingApproval: undefined,
            statusText,
          }));
        }
        const retryMessages = prepareMessagesForStream(await compactMessages(retryBaseMessages, {
          force: true,
-          statusText: 'Request was too large. Compacting context and retrying...',
+          statusText,
          trimLog: '[Catty] Request payload trimmed after forced context compaction to',
          fallbackLog: '[Catty] Forced context compaction after 413 failed; falling back to recent messages only:',
          compressForRequestTooLargeRetry: true,
          compressionLog: '[Catty] Request content compressed after forced context compaction.',
        }));
        await processCattyStream(
@@ -1106,7 +1180,7 @@ export function useAIChatStreaming({
          tools,
          retryMessages,
          abortController.signal,
-          assistantMsgId,
+          retryAssistantMsgId,
          context.activeProvider?.advancedParams,
          continuationContext,
        );
@@ -1123,7 +1197,7 @@ export function useAIChatStreaming({
    }
  }, [
    processCattyStream, reportStreamError, setStreamingForScope,
-    updateLastMessage, updateMessageById,
+    addMessageToSession, updateLastMessage, updateMessageById,
  ]);
  return {
--- a/infrastructure/ai/cattyRequestTooLargeRetry.test.ts
+++ b/infrastructure/ai/cattyRequestTooLargeRetry.test.ts
@@ -0,0 +1,29 @@
 import test from "node:test";
 import assert from "node:assert/strict";
 import {
  createCattyRequestTooLargeRetryError,
  hadToolProgressBeforeRequestTooLarge,
 } from "./cattyRequestTooLargeRetry.ts";
 test("createCattyRequestTooLargeRetryError marks 413 retry errors after tool progress", () => {
  const source = Object.assign(new Error("HTTP 413 Request Entity Too Large"), {
    status: 413,
    responseBody: "<html>too large</html>",
  });
  const retryError = createCattyRequestTooLargeRetryError(source, true);
  assert.equal(retryError.statusCode, 413);
  assert.equal(retryError.status, 413);
  assert.equal(retryError.responseBody, "<html>too large</html>");
  assert.equal(retryError.cause, source);
  assert.equal(hadToolProgressBeforeRequestTooLarge(retryError), true);
 });
 test("hadToolProgressBeforeRequestTooLarge is false when no tool progress was recorded", () => {
  const retryError = createCattyRequestTooLargeRetryError("HTTP 413", false);
  assert.equal(hadToolProgressBeforeRequestTooLarge(retryError), false);
  assert.equal(hadToolProgressBeforeRequestTooLarge(new Error("HTTP 413")), false);
 });
--- a/infrastructure/ai/cattyRequestTooLargeRetry.ts
+++ b/infrastructure/ai/cattyRequestTooLargeRetry.ts
@@ -0,0 +1,34 @@
 export type CattyRequestTooLargeRetryError = Error & {
  cattyHadToolProgress?: boolean;
  statusCode?: number;
  status?: number;
  responseBody?: string;
 };
 export function createCattyRequestTooLargeRetryError(
  error: unknown,
  hadToolProgress: boolean,
 ): CattyRequestTooLargeRetryError {
  const message = error instanceof Error
    ? error.message
    : String(error ?? 'Request too large');
  const retryError = new Error(message) as CattyRequestTooLargeRetryError;
  retryError.name = 'CattyRequestTooLargeRetryError';
  retryError.cause = error;
  retryError.cattyHadToolProgress = hadToolProgress;
  retryError.statusCode = 413;
  if (error && typeof error === 'object') {
    const source = error as Record<string, unknown>;
    if (typeof source.status === 'number') retryError.status = source.status;
    if (typeof source.responseBody === 'string') retryError.responseBody = source.responseBody;
  }
  return retryError;
 }
 export function hadToolProgressBeforeRequestTooLarge(error: unknown): boolean {
  return !!(
    error &&
    typeof error === 'object' &&
    (error as { cattyHadToolProgress?: boolean }).cattyHadToolProgress
  );
 }
--- a/infrastructure/ai/requestPayloadBudget.test.ts
+++ b/infrastructure/ai/requestPayloadBudget.test.ts
@@ -1,233 +0,0 @@
 import test from "node:test";
 import assert from "node:assert/strict";
 import type { ModelMessage } from "ai";
 import {
  DEFAULT_MAX_REQUEST_PAYLOAD_BYTES,
  compressVerboseText,
  estimateUtf8Bytes,
  fitMessagesToRequestPayloadBudget,
  truncateTextWithHeadAndTail,
 } from "./requestPayloadBudget.ts";
 test("compressVerboseText collapses repeated blank lines and duplicate runs", () => {
  const input = "line1\n\n\n\n\nline2\nsame\nsame\nsame\nsame\nline3";
  const output = compressVerboseText(input);
  assert.match(output, /line1\n\n\nline2/);
  assert.ok(output.split("\nsame\n").length <= 3);
 });
 test("truncateTextWithHeadAndTail keeps both ends of long terminal output", () => {
  const value = `${"A".repeat(500)}${"B".repeat(20_000)}${"C".repeat(500)}`;
  const truncated = truncateTextWithHeadAndTail(value, 2_000);
  assert.ok(truncated.startsWith("AAA"));
  assert.ok(truncated.includes("[... output truncated for request size ...]"));
  assert.ok(truncated.endsWith("CCC"));
  assert.ok(truncated.length <= 2_000);
 });
 test("fitMessagesToRequestPayloadBudget truncates verbose tool results before dropping recent turns", () => {
  const messages: ModelMessage[] = [
    { role: "user", content: "run build" },
    {
      role: "assistant",
      content: [{
        type: "tool-call",
        toolCallId: "call-1",
        toolName: "terminal_execute",
        input: { command: "npm run build" },
      }],
    },
    {
      role: "tool",
      content: [{
        type: "tool-result",
        toolCallId: "call-1",
        toolName: "terminal_execute",
        output: { type: "text", value: "X".repeat(200_000) },
      }],
    },
    { role: "user", content: "what failed?" },
  ];
  const result = fitMessagesToRequestPayloadBudget({
    messages,
    maxPayloadBytes: 20_000,
    reservedBytes: 2_000,
    maxToolResultChars: 4_000,
    protectRecentMessages: 4,
  });
  assert.equal(result.messages.length, 4);
  const toolMessage = result.messages[2];
  assert.equal(toolMessage.role, "tool");
  assert.ok(Array.isArray(toolMessage.content));
  const toolPart = toolMessage.content[0] as { output?: { value?: string } };
  assert.ok((toolPart.output?.value?.length ?? 0) < 5_000);
  assert.ok(result.estimatedBytes <= 20_000);
 });
 test("fitMessagesToRequestPayloadBudget drops older turns when truncation alone is insufficient", () => {
  const messages: ModelMessage[] = [];
  for (let turn = 0; turn < 12; turn += 1) {
    messages.push({ role: "user", content: `question ${turn}` });
    messages.push({ role: "assistant", content: `answer ${turn} ${"Z".repeat(20_000)}` });
  }
  messages.push({ role: "user", content: "latest question" });
  const result = fitMessagesToRequestPayloadBudget({
    messages,
    maxPayloadBytes: 8_000,
    reservedBytes: 500,
    protectRecentMessages: 4,
    maxMessageTextChars: 2_000,
  });
  assert.ok(result.messages.length < messages.length);
  assert.equal(result.messages.at(-1)?.role, "user");
  assert.match(String(result.messages.at(-1)?.content ?? ""), /latest question/);
  assert.ok(result.estimatedBytes <= 8_000);
 });
 test("estimateUtf8Bytes measures JSON payload size in UTF-8 bytes", () => {
  const bytes = estimateUtf8Bytes({ text: "caf\u00e9" });
  assert.ok(bytes > 8);
 });
 test("estimateUtf8Bytes works in renderer-like environments without Buffer", () => {
  const originalBuffer = globalThis.Buffer;
  try {
    (globalThis as typeof globalThis & { Buffer?: typeof Buffer }).Buffer = undefined;
    assert.equal(estimateUtf8Bytes({ text: "caf\u00e9" }), new TextEncoder().encode(JSON.stringify({ text: "caf\u00e9" })).byteLength);
  } finally {
    (globalThis as typeof globalThis & { Buffer?: typeof Buffer }).Buffer = originalBuffer;
  }
 });
 test("default payload budget remains a general gateway guard", () => {
  assert.equal(DEFAULT_MAX_REQUEST_PAYLOAD_BYTES, 1_500_000);
 });
 test("fitMessagesToRequestPayloadBudget preserves current long text when the request is under budget", () => {
  const currentText = "CURRENT ".repeat(4_000);
  const result = fitMessagesToRequestPayloadBudget({
    messages: [{ role: "user", content: currentText }],
    maxPayloadBytes: 100_000,
  });
  assert.equal(result.didAdjust, false);
  assert.equal(result.messages[0].content, currentText);
 });
 test("fitMessagesToRequestPayloadBudget reports didAdjust when initial truncation succeeds", () => {
  const messages: ModelMessage[] = [
    { role: "user", content: "run build" },
    {
      role: "tool",
      content: [{
        type: "tool-result",
        toolCallId: "call-1",
        toolName: "terminal_execute",
        output: { type: "text", value: "X".repeat(200_000) },
      }],
    },
  ];
  const result = fitMessagesToRequestPayloadBudget({
    messages,
    maxPayloadBytes: 20_000,
    reservedBytes: 2_000,
  });
  assert.equal(result.didAdjust, true);
  assert.ok(result.estimatedBytes <= 20_000);
 });
 test("fitMessagesToRequestPayloadBudget keeps dropping messages after emergency caps when still over budget", () => {
  const messages: ModelMessage[] = [];
  for (let turn = 0; turn < 8; turn += 1) {
    messages.push({ role: "user", content: `question ${turn} ${"Q".repeat(5_000)}` });
    messages.push({ role: "assistant", content: `answer ${turn} ${"A".repeat(5_000)}` });
  }
  const result = fitMessagesToRequestPayloadBudget({
    messages,
    maxPayloadBytes: 5_000,
    protectRecentMessages: 8,
    maxMessageTextChars: 2_000,
  });
  assert.ok(result.messages.length < messages.length);
  assert.ok(result.estimatedBytes <= 5_000);
 });
 test("fitMessagesToRequestPayloadBudget shrinks a single oversized message for very small budgets", () => {
  const result = fitMessagesToRequestPayloadBudget({
    messages: [{ role: "assistant", content: "Z".repeat(1_000_000) }],
    maxPayloadBytes: 1_000,
    maxMessageTextChars: 500,
  });
  assert.equal(result.messages.length, 1);
  assert.ok(result.estimatedBytes <= 1_000);
 });
 test("fitMessagesToRequestPayloadBudget returns empty messages when budget is fully reserved", () => {
  const result = fitMessagesToRequestPayloadBudget({
    messages: [{ role: "user", content: "hello" }],
    maxPayloadBytes: 100,
    reservedBytes: 200,
  });
  assert.deepEqual(result.messages, []);
  assert.equal(result.didAdjust, true);
  assert.equal(result.estimatedBytes, 0);
 });
 test("fitMessagesToRequestPayloadBudget omits latest attachments only when they are still over budget at the last resort", () => {
  const result = fitMessagesToRequestPayloadBudget({
    messages: [{
      role: "user",
      content: [
        { type: "text", text: "please inspect this image" },
        { type: "image", image: "A".repeat(1_000_000), mediaType: "image/png" },
      ],
    }],
    maxPayloadBytes: 20_000,
  });
  assert.ok(result.estimatedBytes <= 20_000);
  assert.equal(result.messages.length, 1);
  const content = result.messages[0].content;
  assert.ok(Array.isArray(content));
  assert.deepEqual(content[1], {
    type: "text",
    text: "[image attachment omitted to keep the AI request small: mediaType=image/png, 1000000 chars]",
  });
 });
 test("fitMessagesToRequestPayloadBudget omits older oversized attachment payloads as a last resort", () => {
  const result = fitMessagesToRequestPayloadBudget({
    messages: [
      {
        role: "user",
        content: [
          { type: "text", text: "older image" },
          { type: "image", image: "A".repeat(1_000_000), mediaType: "image/png" },
        ],
      },
      { role: "user", content: "current question" },
    ],
    maxPayloadBytes: 20_000,
    protectRecentMessages: 2,
  });
  assert.ok(result.estimatedBytes <= 20_000);
  assert.equal(result.messages.length, 2);
  const content = result.messages[0].content;
  assert.ok(Array.isArray(content));
  assert.deepEqual(content[1], {
    type: "text",
    text: "[image attachment omitted to keep the AI request small: mediaType=image/png, 1000000 chars]",
  });
 });
--- a/infrastructure/ai/requestPayloadBudget.ts
+++ b/infrastructure/ai/requestPayloadBudget.ts
@@ -1,335 +0,0 @@
 import type { ModelMessage } from "ai";
 import { findSafeCompactionSplitIndex } from "./contextCompaction";
 /** Stay below typical nginx `client_max_body_size` defaults (often 1-2 MB). */
 export const DEFAULT_MAX_REQUEST_PAYLOAD_BYTES = 1_500_000;
 /** Per tool-result text cap before the sliding window drops older turns. */
 export const DEFAULT_MAX_TOOL_RESULT_CHARS = 12_000;
 /** Per plain user/assistant text cap inside a single history message. */
 export const DEFAULT_MAX_MESSAGE_TEXT_CHARS = 24_000;
 /** Keep this many recent messages while trimming payload size. */
 export const DEFAULT_PROTECT_RECENT_PAYLOAD_MESSAGES = 8;
 const TRUNCATION_MARKER = "\n\n[... output truncated for request size ...]\n\n";
 const HEAD_CHARS = 800;
 const TAIL_CHARS = 4_000;
 export interface FitMessagesToRequestPayloadBudgetInput {
  messages: ModelMessage[];
  maxPayloadBytes?: number;
  reservedBytes?: number;
  maxToolResultChars?: number;
  maxMessageTextChars?: number;
  protectRecentMessages?: number;
  preserveLatestMessage?: boolean;
 }
 export interface FitMessagesToRequestPayloadBudgetResult {
  messages: ModelMessage[];
  didAdjust: boolean;
  estimatedBytes: number;
 }
 export function estimateUtf8Bytes(value: unknown): number {
  const text = stringifyForByteEstimate(value);
  return utf8ByteLength(text);
 }
 function stringifyForByteEstimate(value: unknown): string {
  try {
    return JSON.stringify(value);
  } catch {
    return String(value ?? "");
  }
 }
 function utf8ByteLength(value: string | undefined): number {
  const text = value ?? "";
  if (typeof Buffer !== "undefined" && typeof Buffer.byteLength === "function") {
    return Buffer.byteLength(text, "utf8");
  }
  return new TextEncoder().encode(text).byteLength;
 }
 /**
 * Collapse noisy terminal/build output before measuring payload size.
 * Keeps semantics while removing repeated blank lines and long duplicate runs.
 */
 export function compressVerboseText(value: string): string {
  if (!value) return value;
  let compressed = value.replace(/\r\n/g, "\n");
  compressed = compressed.replace(/\n{4,}/g, "\n\n\n");
  const lines = compressed.split("\n");
  const deduped: string[] = [];
  let repeatCount = 0;
  for (const line of lines) {
    const previous = deduped[deduped.length - 1];
    if (previous === line) {
      repeatCount += 1;
      if (repeatCount <= 2) deduped.push(line);
      continue;
    }
    repeatCount = 0;
    deduped.push(line);
  }
  return deduped.join("\n");
 }
 export function truncateTextWithHeadAndTail(
  value: string,
  maxChars: number,
  {
    headChars = HEAD_CHARS,
    tailChars = TAIL_CHARS,
    marker = TRUNCATION_MARKER,
  }: {
    headChars?: number;
    tailChars?: number;
    marker?: string;
  } = {},
 ): string {
  if (value.length <= maxChars) return value;
  if (maxChars <= marker.length + 16) {
    return value.slice(0, maxChars);
  }
  const budget = maxChars - marker.length;
  let head = Math.min(headChars, budget);
  let tail = Math.min(tailChars, Math.max(0, budget - head));
  if (head + tail > budget) {
    tail = Math.max(0, budget - head);
  }
  if (head + tail >= value.length) {
    return value.slice(0, maxChars);
  }
  if (head + tail <= 0) {
    return value.slice(0, maxChars);
  }
  return `${value.slice(0, head).trimEnd()}${marker}${value.slice(-tail).trimStart()}`;
 }
 export function truncateModelMessageForPayload(
  message: ModelMessage,
  {
    maxToolResultChars = DEFAULT_MAX_TOOL_RESULT_CHARS,
    maxMessageTextChars = DEFAULT_MAX_MESSAGE_TEXT_CHARS,
    omitLargeAttachments = false,
    preserveContent = false,
  }: {
    maxToolResultChars?: number;
    maxMessageTextChars?: number;
    omitLargeAttachments?: boolean;
    preserveContent?: boolean;
  } = {},
 ): ModelMessage {
  if (preserveContent) return message;
  if (typeof message.content === "string") {
    const compressed = compressVerboseText(message.content);
    return {
      ...message,
      content: truncateTextWithHeadAndTail(compressed, maxMessageTextChars),
    };
  }
  if (!Array.isArray(message.content)) return message;
  return {
    ...message,
    content: message.content.map((part) => truncateContentPartForPayload(part, {
      maxToolResultChars,
      maxMessageTextChars,
      omitLargeAttachments,
    })),
  };
 }
 function truncateContentPartForPayload(
  part: unknown,
  limits: {
    maxToolResultChars: number;
    maxMessageTextChars: number;
    omitLargeAttachments: boolean;
  },
 ): unknown {
  if (!part || typeof part !== "object") return part;
  const record = part as Record<string, unknown>;
  const type = record.type;
  if (type === "text" && typeof record.text === "string") {
    const compressed = compressVerboseText(record.text);
    return {
      ...record,
      text: truncateTextWithHeadAndTail(compressed, limits.maxMessageTextChars),
    };
  }
  if (type === "tool-result") {
    const output = record.output;
    if (output && typeof output === "object") {
      const outputRecord = output as Record<string, unknown>;
      if (outputRecord.type === "text" && typeof outputRecord.value === "string") {
        const compressed = compressVerboseText(outputRecord.value);
        return {
          ...record,
          output: {
            ...outputRecord,
            value: truncateTextWithHeadAndTail(compressed, limits.maxToolResultChars),
          },
        };
      }
    }
  }
  if (limits.omitLargeAttachments && type === "image" && typeof record.image === "string") {
    return omittedAttachmentTextPart("image", record.image, record);
  }
  if (limits.omitLargeAttachments && type === "file" && typeof record.data === "string") {
    return omittedAttachmentTextPart("file", record.data, record);
  }
  return part;
 }
 function omittedAttachmentTextPart(
  label: "image" | "file",
  payload: string,
  record: Record<string, unknown>,
 ): { type: "text"; text: string } {
  const details = [
    typeof record.filename === "string" ? `filename=${record.filename}` : undefined,
    typeof record.mediaType === "string" ? `mediaType=${record.mediaType}` : undefined,
    `${payload.length} chars`,
  ].filter(Boolean).join(", ");
  return {
    type: "text",
    text: `[${label} attachment omitted to keep the AI request small: ${details}]`,
  };
 }
 export function fitMessagesToRequestPayloadBudget({
  messages,
  maxPayloadBytes = DEFAULT_MAX_REQUEST_PAYLOAD_BYTES,
  reservedBytes = 0,
  maxToolResultChars = DEFAULT_MAX_TOOL_RESULT_CHARS,
  maxMessageTextChars = DEFAULT_MAX_MESSAGE_TEXT_CHARS,
  protectRecentMessages = DEFAULT_PROTECT_RECENT_PAYLOAD_MESSAGES,
  preserveLatestMessage = true,
 }: FitMessagesToRequestPayloadBudgetInput): FitMessagesToRequestPayloadBudgetResult {
  const budget = Math.max(0, maxPayloadBytes - Math.max(0, reservedBytes));
  if (budget === 0) {
    return { messages: [], didAdjust: messages.length > 0, estimatedBytes: 0 };
  }
  const originalBytes = estimateUtf8Bytes(messages);
  if (originalBytes <= budget) {
    return { messages, didAdjust: false, estimatedBytes: originalBytes };
  }
  const shouldPreserveMessage = (message: ModelMessage, index: number, list: ModelMessage[]) => (
    preserveLatestMessage && index === list.length - 1 && message.role === "user"
  );
  let adjusted = messages.map((message, index) => truncateModelMessageForPayload(message, {
    maxToolResultChars,
    maxMessageTextChars,
    preserveContent: shouldPreserveMessage(message, index, messages),
  }));
  let estimatedBytes = estimateUtf8Bytes(adjusted);
  let didAdjust = estimatedBytes !== originalBytes;
  if (estimatedBytes <= budget) {
    return { messages: adjusted, didAdjust, estimatedBytes };
  }
  const toolResultCaps = [
    maxToolResultChars,
    Math.floor(maxToolResultChars * 0.6),
    Math.floor(maxToolResultChars * 0.35),
    4_000,
    2_000,
    1_000,
  ];
  const messageTextCaps = [
    maxMessageTextChars,
    Math.floor(maxMessageTextChars * 0.6),
    Math.floor(maxMessageTextChars * 0.35),
    8_000,
    4_000,
    2_000,
  ];
  for (let i = 1; i < toolResultCaps.length; i += 1) {
    adjusted = adjusted.map((message, index) => truncateModelMessageForPayload(message, {
      maxToolResultChars: toolResultCaps[i],
      maxMessageTextChars: messageTextCaps[i],
      preserveContent: shouldPreserveMessage(message, index, adjusted),
    }));
    estimatedBytes = estimateUtf8Bytes(adjusted);
    didAdjust = true;
    if (estimatedBytes <= budget) {
      return { messages: adjusted, didAdjust, estimatedBytes };
    }
  }
  let working = [...adjusted];
  while (working.length > protectRecentMessages) {
    const splitAt = findSafeCompactionSplitIndex(working, protectRecentMessages);
    if (splitAt <= 0) break;
    working = working.slice(splitAt);
    estimatedBytes = estimateUtf8Bytes(working);
    didAdjust = true;
    if (estimatedBytes <= budget) {
      return { messages: working, didAdjust, estimatedBytes };
    }
  }
  const emergencyToolCap = 600;
  const emergencyTextCap = 1_200;
  working = working.map((message, index) => truncateModelMessageForPayload(message, {
    maxToolResultChars: emergencyToolCap,
    maxMessageTextChars: emergencyTextCap,
    omitLargeAttachments: true,
    preserveContent: shouldPreserveMessage(message, index, working),
  }));
  estimatedBytes = estimateUtf8Bytes(working);
  didAdjust = true;
  let emergencyProtect = Math.min(protectRecentMessages, working.length);
  while (estimatedBytes > budget && working.length > 1) {
    emergencyProtect = Math.max(1, emergencyProtect - 1);
    const splitAt = findSafeCompactionSplitIndex(working, emergencyProtect);
    if (splitAt <= 0) {
      working = working.slice(-1);
    } else {
      working = working.slice(splitAt);
    }
    working = working.map((message, index) => truncateModelMessageForPayload(message, {
      maxToolResultChars: emergencyToolCap,
      maxMessageTextChars: emergencyTextCap,
      omitLargeAttachments: true,
      preserveContent: shouldPreserveMessage(message, index, working),
    }));
    estimatedBytes = estimateUtf8Bytes(working);
  }
  let finalTextCap = emergencyTextCap;
  let finalToolCap = emergencyToolCap;
  while (estimatedBytes > budget && (finalTextCap > 32 || finalToolCap > 32)) {
    finalTextCap = Math.max(32, Math.floor(finalTextCap * 0.6));
    finalToolCap = Math.max(32, Math.floor(finalToolCap * 0.6));
    working = working.map((message) => truncateModelMessageForPayload(message, {
      maxToolResultChars: finalToolCap,
      maxMessageTextChars: finalTextCap,
      omitLargeAttachments: true,
      preserveContent: false,
    }));
    estimatedBytes = estimateUtf8Bytes(working);
  }
  return { messages: working, didAdjust, estimatedBytes };
 }
--- a/infrastructure/ai/requestPayloadCompression.test.ts
+++ b/infrastructure/ai/requestPayloadCompression.test.ts
@@ -0,0 +1,74 @@
 import test from "node:test";
 import assert from "node:assert/strict";
 import type { ModelMessage } from "ai";
 import {
  compressMessagesForRequestTooLargeRetry,
  compressVerboseText,
  truncateTextWithHeadAndTail,
 } from "./requestPayloadCompression.ts";
 test("compressVerboseText collapses repeated blank lines and duplicate runs", () => {
  const input = "line1\n\n\n\n\nline2\nsame\nsame\nsame\nsame\nline3";
  const output = compressVerboseText(input);
  assert.match(output, /line1\n\n\nline2/);
  assert.ok(output.split("\nsame\n").length <= 3);
 });
 test("truncateTextWithHeadAndTail keeps both ends of long terminal output", () => {
  const value = `${"A".repeat(500)}${"B".repeat(20_000)}${"C".repeat(500)}`;
  const truncated = truncateTextWithHeadAndTail(value, 2_000);
  assert.ok(truncated.startsWith("AAA"));
  assert.ok(truncated.includes("[... output truncated for request size ...]"));
  assert.ok(truncated.endsWith("CCC"));
  assert.ok(truncated.length <= 2_000);
 });
 test("compressMessagesForRequestTooLargeRetry compresses messages without enforcing a byte budget", () => {
  const messages: ModelMessage[] = [
    { role: "user", content: "run build" },
    {
      role: "tool",
      content: [{
        type: "tool-result",
        toolCallId: "call-1",
        toolName: "terminal_execute",
        output: { type: "text", value: "X".repeat(200_000) },
      }],
    },
    {
      role: "user",
      content: [
        { type: "text", text: "please inspect this image" },
        { type: "image", image: "A".repeat(1_000_000), mediaType: "image/png" },
      ],
    },
  ];
  const result = compressMessagesForRequestTooLargeRetry(messages);
  assert.equal(result.didAdjust, true);
  assert.deepEqual(Object.keys(result).sort(), ["didAdjust", "messages"]);
  assert.equal(result.messages.length, messages.length);
  const toolContent = result.messages[1].content;
  assert.ok(Array.isArray(toolContent));
  const toolPart = toolContent[0] as { output?: { value?: string } };
  assert.ok((toolPart.output?.value?.length ?? 0) < 5_000);
  const userContent = result.messages[2].content;
  assert.ok(Array.isArray(userContent));
  assert.deepEqual(userContent[1], {
    type: "text",
    text: "[image attachment omitted to keep the AI request small: mediaType=image/png, 1000000 chars]",
  });
 });
 test("compressMessagesForRequestTooLargeRetry reports no adjustment for compact messages", () => {
  const messages: ModelMessage[] = [{ role: "user", content: "hello" }];
  const result = compressMessagesForRequestTooLargeRetry(messages);
  assert.equal(result.didAdjust, false);
  assert.deepEqual(result.messages, messages);
 });
--- a/infrastructure/ai/requestPayloadCompression.ts
+++ b/infrastructure/ai/requestPayloadCompression.ts
@@ -0,0 +1,167 @@
 import type { ModelMessage } from "ai";
 const RETRY_MAX_TOOL_RESULT_CHARS = 4_000;
 const RETRY_MAX_MESSAGE_TEXT_CHARS = 8_000;
 const TRUNCATION_MARKER = "\n\n[... output truncated for request size ...]\n\n";
 const HEAD_CHARS = 800;
 const TAIL_CHARS = 4_000;
 export interface CompressMessagesForRequestTooLargeRetryResult {
  messages: ModelMessage[];
  didAdjust: boolean;
 }
 /**
 * Collapse noisy terminal/build output.
 * Keeps semantics while removing repeated blank lines and long duplicate runs.
 */
 export function compressVerboseText(value: string): string {
  if (!value) return value;
  let compressed = value.replace(/\r\n/g, "\n");
  compressed = compressed.replace(/\n{4,}/g, "\n\n\n");
  const lines = compressed.split("\n");
  const deduped: string[] = [];
  let repeatCount = 0;
  for (const line of lines) {
    const previous = deduped[deduped.length - 1];
    if (previous === line) {
      repeatCount += 1;
      if (repeatCount <= 2) deduped.push(line);
      continue;
    }
    repeatCount = 0;
    deduped.push(line);
  }
  return deduped.join("\n");
 }
 export function truncateTextWithHeadAndTail(
  value: string,
  maxChars: number,
  {
    headChars = HEAD_CHARS,
    tailChars = TAIL_CHARS,
    marker = TRUNCATION_MARKER,
  }: {
    headChars?: number;
    tailChars?: number;
    marker?: string;
  } = {},
 ): string {
  if (value.length <= maxChars) return value;
  if (maxChars <= marker.length + 16) {
    return value.slice(0, maxChars);
  }
  const budget = maxChars - marker.length;
  const head = Math.min(headChars, budget);
  let tail = Math.min(tailChars, Math.max(0, budget - head));
  if (head + tail > budget) {
    tail = Math.max(0, budget - head);
  }
  if (head + tail >= value.length) {
    return value.slice(0, maxChars);
  }
  if (head + tail <= 0) {
    return value.slice(0, maxChars);
  }
  return `${value.slice(0, head).trimEnd()}${marker}${value.slice(-tail).trimStart()}`;
 }
 export function compressMessagesForRequestTooLargeRetry(
  messages: ModelMessage[],
 ): CompressMessagesForRequestTooLargeRetryResult {
  let didAdjust = false;
  const compressedMessages = messages.map((message) => {
    const compressed = compressModelMessageForRequestRetry(message);
    if (compressed !== message) didAdjust = true;
    return compressed;
  });
  return {
    messages: didAdjust ? compressedMessages : messages,
    didAdjust,
  };
 }
 function compressModelMessageForRequestRetry(message: ModelMessage): ModelMessage {
  if (typeof message.content === "string") {
    const content = compressAndTruncateText(message.content, RETRY_MAX_MESSAGE_TEXT_CHARS);
    return content === message.content ? message : { ...message, content };
  }
  if (!Array.isArray(message.content)) return message;
  let didAdjust = false;
  const content = message.content.map((part) => {
    const compressed = compressContentPartForRequestRetry(part);
    if (compressed !== part) didAdjust = true;
    return compressed;
  });
  return didAdjust ? { ...message, content } : message;
 }
 function compressContentPartForRequestRetry(part: unknown): unknown {
  if (!part || typeof part !== "object") return part;
  const record = part as Record<string, unknown>;
  const type = record.type;
  if (type === "text" && typeof record.text === "string") {
    const text = compressAndTruncateText(record.text, RETRY_MAX_MESSAGE_TEXT_CHARS);
    return text === record.text ? part : { ...record, text };
  }
  if (type === "tool-result") {
    const output = record.output;
    if (output && typeof output === "object") {
      const outputRecord = output as Record<string, unknown>;
      if (outputRecord.type === "text" && typeof outputRecord.value === "string") {
        const value = compressAndTruncateText(outputRecord.value, RETRY_MAX_TOOL_RESULT_CHARS);
        if (value === outputRecord.value) return part;
        return {
          ...record,
          output: {
            ...outputRecord,
            value,
          },
        };
      }
    }
  }
  if (type === "image" && typeof record.image === "string") {
    return omittedAttachmentTextPart("image", record.image, record);
  }
  if (type === "file" && typeof record.data === "string") {
    return omittedAttachmentTextPart("file", record.data, record);
  }
  return part;
 }
 function compressAndTruncateText(value: string, maxChars: number): string {
  return truncateTextWithHeadAndTail(compressVerboseText(value), maxChars);
 }
 function omittedAttachmentTextPart(
  label: "image" | "file",
  payload: string,
  record: Record<string, unknown>,
 ): { type: "text"; text: string } {
  const details = [
    typeof record.filename === "string" ? `filename=${record.filename}` : undefined,
    typeof record.mediaType === "string" ? `mediaType=${record.mediaType}` : undefined,
    `${payload.length} chars`,
  ].filter(Boolean).join(", ");
  return {
    type: "text",
    text: `[${label} attachment omitted to keep the AI request small: ${details}]`,
  };
 }
--- a/infrastructure/ai/sdk/tools.ts
+++ b/infrastructure/ai/sdk/tools.ts
@@ -15,7 +15,7 @@ import {
 } from '../shared/toolExecutors';
 import { requestApproval } from '../shared/approvalGate';
 import { reserveSessionSlot } from '../shared/sessionExecutionQueue';
-import { truncateTextWithHeadAndTail } from '../requestPayloadBudget';
+import { truncateTextWithHeadAndTail } from '../requestPayloadCompression';
 const MAX_LIVE_TERMINAL_STDOUT_CHARS = 24_000;
 const MAX_LIVE_TERMINAL_STDERR_CHARS = 12_000;