fix(ai): compress Catty requests only after 413 (#1327)

* fix(ai): compress Catty requests only after 413 * fix(ai): retry 413 after tool progress safely * fix(ai): mark thrown 413 retries after tool progress * fix(ai): preserve tool results in 413 retry
2026-06-09 13:11:42 +08:00
parent 3bc373dbec
commit 517cbb6cee
10 changed files with 578 additions and 746 deletions
--- a/components/ai/cattyHistoryReplay.test.ts
+++ b/components/ai/cattyHistoryReplay.test.ts
@@ -91,6 +91,23 @@ test("buildHistoricalToolResultReplayText keeps non-terminal tool results intact
  assert.equal(buildHistoricalToolResultReplayText(result, toolCall), "search result summary");
 });

+test("buildHistoricalToolResultReplayText can preserve terminal output for 413 retries", () => {
+  const toolCall: ToolCall = {
+    id: "call-1",
+    name: "terminal_execute",
+    arguments: { command: "npm test" },
+  };
+  const result: ToolResult = {
+    toolCallId: "call-1",
+    content: "real terminal output",
+  };
+
+  assert.equal(
+    buildHistoricalToolResultReplayText(result, toolCall, { preserveTerminalOutput: true }),
+    "real terminal output",
+  );
+});
+
 test("buildHistoricalToolReplayMaps pairs reused tool ids with the nearest preceding call", () => {
  const messages: ChatMessage[] = [
    {
--- a/components/ai/cattyHistoryReplay.ts
+++ b/components/ai/cattyHistoryReplay.ts
@@ -112,9 +112,14 @@ function findLastIndex<T>(items: T[], predicate: (item: T) => boolean): number {
 export function buildHistoricalToolResultReplayText(
  result: ToolResult,
  toolCall?: ToolCall,
+  {
+    preserveTerminalOutput = false,
+  }: {
+    preserveTerminalOutput?: boolean;
+  } = {},
 ): string {
  const toolName = toolCall?.name ?? "unknown";
-  if (!isTerminalToolName(toolName)) {
+  if (!isTerminalToolName(toolName) || preserveTerminalOutput) {
    return result.content;
  }

--- a/components/ai/hooks/useAIChatStreaming.ts
+++ b/components/ai/hooks/useAIChatStreaming.ts
@@ -21,6 +21,7 @@ import type {
  ExternalAgentConfig,
  ProviderAdvancedParams,
  ProviderConfig,
+  ToolResult,
  WebSearchConfig,
 } from '../../../infrastructure/ai/types';
 import { isWebSearchReady } from '../../../infrastructure/ai/types';
@@ -36,9 +37,12 @@ import {
  resolveContextWindow,
 } from '../../../infrastructure/ai/contextCompaction';
 import {
-  estimateUtf8Bytes,
-  fitMessagesToRequestPayloadBudget,
-} from '../../../infrastructure/ai/requestPayloadBudget';
+  compressMessagesForRequestTooLargeRetry,
+} from '../../../infrastructure/ai/requestPayloadCompression';
+import {
+  createCattyRequestTooLargeRetryError,
+  hadToolProgressBeforeRequestTooLarge,
+} from '../../../infrastructure/ai/cattyRequestTooLargeRetry';
 import { createModelFromConfig } from '../../../infrastructure/ai/sdk/providers';
 import { createCattyTools } from '../../../infrastructure/ai/sdk/tools';
 import type { ExecutorContext } from '../../../infrastructure/ai/cattyAgent/executor';
@@ -50,6 +54,7 @@ import {
  buildPromptWithTerminalSelectionAttachments,
  isTerminalSelectionAttachment,
 } from '../../../application/state/terminalSelectionAttachment';
+import { latestAISessionsSnapshot } from '../../../application/state/aiStateSnapshots';
 import {
  buildHistoricalToolReplayMaps,
  buildHistoricalToolResultReplayText,
@@ -343,7 +348,7 @@ export function useAIChatStreaming({
    // Track the current assistant message ID so updates target the correct message
    let activeMsgId = currentAssistantMsgId;
    let lastAddedRole: 'assistant' | 'tool' = 'assistant';
-    let hasRetryUnsafeToolProgress = false;
+    let hadToolProgress = false;
    const reader = result.fullStream.getReader();

    // -- Text-delta batching: accumulate deltas and flush periodically --
@@ -419,7 +424,16 @@ export function useAIChatStreaming({

    try {
    while (true) {
-      const { done, value } = await reader.read();
+      let readResult: ReadableStreamReadResult<unknown>;
+      try {
+        readResult = await reader.read();
+      } catch (readErr) {
+        if (isRequestTooLargeError(readErr)) {
+          throw createCattyRequestTooLargeRetryError(readErr, hadToolProgress);
+        }
+        throw readErr;
+      }
+      const { done, value } = readResult;
      if (done) break;
      // Use the StreamChunk union for type narrowing instead of unsafe casts
      const chunk = value as StreamChunk;
@@ -486,7 +500,7 @@ export function useAIChatStreaming({
          cancelPendingFlush();
          flushText();
          const typedChunk = chunk as ToolCallChunk;
-          hasRetryUnsafeToolProgress = true;
+          hadToolProgress = true;
          const messageId = ensureAssistantMessage();
          const providerOptions = normalizeProviderContinuationOptions(typedChunk.providerMetadata);
          updateMessageById(streamSessionId, messageId, msg => ({
@@ -512,7 +526,7 @@ export function useAIChatStreaming({
          cancelPendingFlush();
          flushText();
          const typedChunk = chunk as ToolResultChunk;
-          hasRetryUnsafeToolProgress = true;
+          hadToolProgress = true;
          // Mark the assistant message's tool execution as completed
          updateMessageById(streamSessionId, activeMsgId, msg =>
            msg.role === 'assistant' && msg.executionStatus === 'running'
@@ -559,10 +573,13 @@ export function useAIChatStreaming({
            console.warn('[Catty] suppressed SDK stream state error:', typedChunk.error);
            break;
          }
-          if (isRequestTooLargeError(typedChunk.error) && !hasRetryUnsafeToolProgress) {
+          if (isRequestTooLargeError(typedChunk.error)) {
            cancelPendingFlush();
            flushText();
-            throw typedChunk.error;
+            throw createCattyRequestTooLargeRetryError(
+              typedChunk.error,
+              hadToolProgress,
+            );
          }
          cancelPendingFlush();
          flushText();
@@ -796,44 +813,86 @@ export function useAIChatStreaming({
    };

    try {
-      // Issue #5: Build SDK messages including tool-call and tool-result messages
-      // so the LLM maintains full conversation context
-      const allMessages = currentSession?.messages ?? [];
+      let openAIChatAssistantFieldsByMessage = new Map<ModelMessage, OpenAIChatAssistantFields | undefined>();
+      const buildSdkMessages = (
+        allMessages: ChatMessage[],
+        includeCurrentUserMessage: boolean,
+        {
+          preserveTerminalToolResults = new Set<ToolResult>(),
+        }: {
+          preserveTerminalToolResults?: ReadonlySet<ToolResult>;
+        } = {},
+      ): Array<ModelMessage> => {
+        const { resolvedToolCallsByAssistant, toolCallByToolResult } = buildHistoricalToolReplayMaps(allMessages);
+        const nextFieldsByMessage = new Map<ModelMessage, OpenAIChatAssistantFields | undefined>();
+        const sdkMessages: Array<ModelMessage> = [];
+        let previousHistoryMessageWasToolResult = false;

-      const { resolvedToolCallsByAssistant, toolCallByToolResult } = buildHistoricalToolReplayMaps(allMessages);
-
-      const sdkMessages: Array<ModelMessage> = [];
-      const openAIChatAssistantFieldsByMessage = new Map<ModelMessage, OpenAIChatAssistantFields | undefined>();
-      let previousHistoryMessageWasToolResult = false;
-      for (const m of allMessages) {
-        const currentMessageFollowsToolResult = previousHistoryMessageWasToolResult;
-        if (m.role === 'user') {
-          // Historical attachments are replayed as placeholders so screenshots,
-          // files, and terminal selections do not balloon every follow-up request.
-          const messageAttachments = m.attachments ?? m.images;
-          sdkMessages.push({
-            role: 'user',
-            content: buildHistoricalUserReplayContent(m.content, messageAttachments ?? []),
-          });
-        } else if (m.role === 'assistant') {
-          const activeContinuation = isProviderContinuationForSource(
-            m.providerContinuation,
-            continuationContext.source,
-          )
-            ? m.providerContinuation
-            : undefined;
-          const openAIChatAssistantFields = getOpenAIChatAssistantFieldsForHistoryMessage(
-            m,
-            continuationContext.source,
-          );
-          if (m.toolCalls?.length) {
-            // Only include tool calls that have matching results
-            const resolvedToolCalls = resolvedToolCallsByAssistant.get(m);
-            const resolvedCalls = resolvedToolCalls
-              ? m.toolCalls.filter(tc => resolvedToolCalls.has(tc))
-              : [];
-            const contentParts: AssistantContentPart[] = [];
-            if (resolvedCalls.length > 0) {
+        for (const m of allMessages) {
+          const currentMessageFollowsToolResult = previousHistoryMessageWasToolResult;
+          if (m.role === 'user') {
+            // Historical attachments are replayed as placeholders so screenshots,
+            // files, and terminal selections do not balloon every follow-up request.
+            const messageAttachments = m.attachments ?? m.images;
+            sdkMessages.push({
+              role: 'user',
+              content: buildHistoricalUserReplayContent(m.content, messageAttachments ?? []),
+            });
+          } else if (m.role === 'assistant') {
+            const activeContinuation = isProviderContinuationForSource(
+              m.providerContinuation,
+              continuationContext.source,
+            )
+              ? m.providerContinuation
+              : undefined;
+            const openAIChatAssistantFields = getOpenAIChatAssistantFieldsForHistoryMessage(
+              m,
+              continuationContext.source,
+            );
+            if (m.toolCalls?.length) {
+              // Only include tool calls that have matching results
+              const resolvedToolCalls = resolvedToolCallsByAssistant.get(m);
+              const resolvedCalls = resolvedToolCalls
+                ? m.toolCalls.filter(tc => resolvedToolCalls.has(tc))
+                : [];
+              const contentParts: AssistantContentPart[] = [];
+              if (resolvedCalls.length > 0) {
+                for (const part of activeContinuation?.reasoningParts ?? []) {
+                  if (!part.text && !part.providerOptions) continue;
+                  contentParts.push({
+                    type: 'reasoning' as const,
+                    text: part.text,
+                    ...(part.providerOptions ? { providerOptions: part.providerOptions } : {}),
+                  });
+                }
+              }
+              if (m.content) {
+                contentParts.push({
+                  type: 'text' as const,
+                  text: m.content,
+                  ...(activeContinuation?.textProviderOptions ? { providerOptions: activeContinuation.textProviderOptions } : {}),
+                });
+              }
+              for (const tc of resolvedCalls) {
+                const providerOptions = activeContinuation?.toolCallProviderOptionsById?.[tc.id];
+                contentParts.push({
+                  type: 'tool-call' as const,
+                  toolCallId: tc.id,
+                  toolName: tc.name,
+                  input: tc.arguments ?? {},
+                  ...(providerOptions ? { providerOptions } : {}),
+                });
+              }
+              // If all tool calls were orphaned, just include the text content
+              if (contentParts.length > 0) {
+                const message: ModelMessage = { role: 'assistant', content: toAssistantModelContent(contentParts) };
+                sdkMessages.push(message);
+                if (resolvedCalls.length > 0) {
+                  rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, nextFieldsByMessage);
+                }
+              }
+            } else if (m.content) {
+              const contentParts: AssistantContentPart[] = [];
              for (const part of activeContinuation?.reasoningParts ?? []) {
                if (!part.text && !part.providerOptions) continue;
                contentParts.push({
@@ -842,95 +901,91 @@ export function useAIChatStreaming({
                  ...(part.providerOptions ? { providerOptions: part.providerOptions } : {}),
                });
              }
-            }
-            if (m.content) {
              contentParts.push({
                type: 'text' as const,
                text: m.content,
                ...(activeContinuation?.textProviderOptions ? { providerOptions: activeContinuation.textProviderOptions } : {}),
              });
-            }
-            for (const tc of resolvedCalls) {
-              const providerOptions = activeContinuation?.toolCallProviderOptionsById?.[tc.id];
-              contentParts.push({
-                type: 'tool-call' as const,
-                toolCallId: tc.id,
-                toolName: tc.name,
-                input: tc.arguments ?? {},
-                ...(providerOptions ? { providerOptions } : {}),
-              });
-            }
-            // If all tool calls were orphaned, just include the text content
-            if (contentParts.length > 0) {
-              const message: ModelMessage = { role: 'assistant', content: toAssistantModelContent(contentParts) };
+              const message: ModelMessage = {
+                role: 'assistant',
+                content: toAssistantModelContent(contentParts),
+              };
              sdkMessages.push(message);
-              if (resolvedCalls.length > 0) {
-                rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, openAIChatAssistantFieldsByMessage);
+              if (currentMessageFollowsToolResult) {
+                rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, nextFieldsByMessage);
              }
            }
-          } else if (m.content) {
-            const contentParts: AssistantContentPart[] = [];
-            for (const part of activeContinuation?.reasoningParts ?? []) {
-              if (!part.text && !part.providerOptions) continue;
-              contentParts.push({
-                type: 'reasoning' as const,
-                text: part.text,
-                ...(part.providerOptions ? { providerOptions: part.providerOptions } : {}),
-              });
-            }
-            contentParts.push({
-              type: 'text' as const,
-              text: m.content,
-              ...(activeContinuation?.textProviderOptions ? { providerOptions: activeContinuation.textProviderOptions } : {}),
+          } else if (m.role === 'tool' && m.toolResults?.length) {
+            sdkMessages.push({
+              role: 'tool',
+              content: m.toolResults.map(tr => {
+                const toolCall = toolCallByToolResult.get(tr);
+                return {
+                  type: 'tool-result' as const,
+                  toolCallId: tr.toolCallId,
+                  toolName: toolCall?.name ?? 'unknown',
+                  output: {
+                    type: 'text' as const,
+                    value: buildHistoricalToolResultReplayText(tr, toolCall, {
+                      preserveTerminalOutput: preserveTerminalToolResults.has(tr),
+                    }),
+                  },
+                };
+              }),
            });
-            const message: ModelMessage = {
-              role: 'assistant',
-              content: toAssistantModelContent(contentParts),
-            };
-            sdkMessages.push(message);
-            if (currentMessageFollowsToolResult) {
-              rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, openAIChatAssistantFieldsByMessage);
+          }
+          previousHistoryMessageWasToolResult = m.role === 'tool' && !!m.toolResults?.length;
+        }
+
+        if (includeCurrentUserMessage) {
+          // Build the current user message — include attachments as multimodal content
+          if (attachments?.length) {
+            const modelText = buildPromptWithTerminalSelectionAttachments(trimmed, attachments);
+            const modelAttachments = attachments.filter(
+              (attachment) => !isTerminalSelectionAttachment(attachment),
+            );
+            if (!modelAttachments.length) {
+              sdkMessages.push({ role: 'user', content: modelText });
+            } else {
+              const parts: Array<{ type: 'text'; text: string } | { type: 'image'; image: string; mediaType?: string } | { type: 'file'; data: string; mediaType: string; filename?: string }> = [];
+              parts.push({ type: 'text', text: modelText });
+              for (const att of modelAttachments) {
+                if (att.mediaType.startsWith('image/')) {
+                  parts.push({ type: 'image', image: att.base64Data, mediaType: att.mediaType });
+                } else {
+                  parts.push({ type: 'file', data: att.base64Data, mediaType: att.mediaType, filename: att.filename });
+                }
+              }
+              sdkMessages.push({ role: 'user', content: parts });
            }
-          }
-        } else if (m.role === 'tool' && m.toolResults?.length) {
-          sdkMessages.push({
-            role: 'tool',
-            content: m.toolResults.map(tr => {
-              const toolCall = toolCallByToolResult.get(tr);
-              return {
-                type: 'tool-result' as const,
-                toolCallId: tr.toolCallId,
-                toolName: toolCall?.name ?? 'unknown',
-                output: { type: 'text' as const, value: buildHistoricalToolResultReplayText(tr, toolCall) },
-              };
-            }),
-          });
-        }
-        previousHistoryMessageWasToolResult = m.role === 'tool' && !!m.toolResults?.length;
-      }
-      // Build the current user message — include attachments as multimodal content
-      if (attachments?.length) {
-        const modelText = buildPromptWithTerminalSelectionAttachments(trimmed, attachments);
-        const modelAttachments = attachments.filter(
-          (attachment) => !isTerminalSelectionAttachment(attachment),
-        );
-        if (!modelAttachments.length) {
-          sdkMessages.push({ role: 'user', content: modelText });
-        } else {
-        const parts: Array<{ type: 'text'; text: string } | { type: 'image'; image: string; mediaType?: string } | { type: 'file'; data: string; mediaType: string; filename?: string }> = [];
-        parts.push({ type: 'text', text: modelText });
-        for (const att of modelAttachments) {
-          if (att.mediaType.startsWith('image/')) {
-            parts.push({ type: 'image', image: att.base64Data, mediaType: att.mediaType });
          } else {
-            parts.push({ type: 'file', data: att.base64Data, mediaType: att.mediaType, filename: att.filename });
+            sdkMessages.push({ role: 'user', content: trimmed });
          }
        }
-        sdkMessages.push({ role: 'user', content: parts });
+
+        openAIChatAssistantFieldsByMessage = nextFieldsByMessage;
+        return sdkMessages;
+      };
+
+      const sdkMessages = buildSdkMessages(currentSession?.messages ?? [], true);
+      const collectToolResultsAfterMessage = (
+        messages: ChatMessage[],
+        messageId: string,
+      ): Set<ToolResult> => {
+        const results = new Set<ToolResult>();
+        let afterMessage = false;
+        for (const message of messages) {
+          if (message.id === messageId) {
+            afterMessage = true;
+            continue;
+          }
+          if (!afterMessage || message.role !== 'tool' || !message.toolResults?.length) continue;
+          for (const result of message.toolResults) {
+            results.add(result);
+          }
        }
-      } else {
-        sdkMessages.push({ role: 'user', content: trimmed });
-      }
+        return results;
+      };

      // Create model with placeholder API key — the main process injects the real
      // decrypted key when the HTTP request is proxied through IPC, so plaintext
@@ -958,20 +1013,12 @@ export function useAIChatStreaming({
        defaultContextWindow: DEFAULT_CONTEXT_WINDOW_TOKENS,
      });
      const outputReserveTokens = Math.min(4096, Math.ceil(contextWindow * 0.05));
-      const requestReserveTokens = outputReserveTokens + estimateUnknownTokens({
+      const getRequestReserveTokens = () => outputReserveTokens + estimateUnknownTokens({
        systemPrompt,
        toolNames: Object.keys(tools),
        openAIChatAssistantFields: Array.from(openAIChatAssistantFieldsByMessage.values()),
      });

-      const payloadReservedBytes = estimateUtf8Bytes({
-        system: systemPrompt,
-        tools: Object.keys(tools),
-      });
-      const applyRequestPayloadBudget = (messages: ModelMessage[]) => fitMessagesToRequestPayloadBudget({
-        messages,
-        reservedBytes: payloadReservedBytes,
-      });
      const summarizeForCompaction = async (messagesToSummarize: ModelMessage[]) => {
        updateLastMessage(sessionId, msg => ({ ...msg, statusText: 'Compacting earlier context...' }));
        const result = await generateText({
@@ -999,64 +1046,64 @@ export function useAIChatStreaming({
        );
        return pruned;
      };
-      const compactAndBudgetMessages = async (
+      const compactMessages = async (
        messages: ModelMessage[],
        {
          force = false,
          statusText,
-          trimLog,
          fallbackLog,
+          compressForRequestTooLargeRetry = false,
+          compressionLog,
        }: {
          force?: boolean;
          statusText?: string;
-          trimLog: string;
          fallbackLog: string;
+          compressForRequestTooLargeRetry?: boolean;
+          compressionLog?: string;
        },
      ): Promise<ModelMessage[]> => {
+        const compressRetryMessages = (candidateMessages: ModelMessage[], log?: string): ModelMessage[] => {
+          if (!compressForRequestTooLargeRetry) return candidateMessages;
+          const compressed = compressMessagesForRequestTooLargeRetry(candidateMessages);
+          if (compressed.didAdjust && log) {
+            console.warn(log);
+          }
+          return compressed.messages;
+        };
+
        try {
          if (statusText) {
            updateLastMessage(sessionId, msg => ({ ...msg, statusText }));
          }
+          const inputMessages = compressRetryMessages(messages, compressionLog);
          const compacted = await prepareContextCompaction({
-            messages,
+            messages: inputMessages,
            contextWindow,
-            reservedTokens: requestReserveTokens,
+            reservedTokens: getRequestReserveTokens(),
            thresholdRatio: force ? 0 : undefined,
            protectRecentMessages: DEFAULT_PROTECT_RECENT_MESSAGES,
            summarize: summarizeForCompaction,
          });
          let nextMessages = force && !compacted.didCompact
-            ? keepRecentContextMessages(messages, DEFAULT_PROTECT_RECENT_MESSAGES)
+            ? keepRecentContextMessages(inputMessages, DEFAULT_PROTECT_RECENT_MESSAGES)
            : compacted.messages;
-          const budgetResult = applyRequestPayloadBudget(nextMessages);
-          if (budgetResult.didAdjust) {
-            console.warn(`${trimLog} ${budgetResult.estimatedBytes} bytes.`);
-            nextMessages = budgetResult.messages;
-          }
-          return nextMessages;
+          return compressRetryMessages(nextMessages);
        } catch (err) {
          if (abortController.signal.aborted) throw err;
          console.warn(fallbackLog, err);
-          const fallbackBudget = applyRequestPayloadBudget(
-            keepRecentContextMessages(messages, DEFAULT_PROTECT_RECENT_MESSAGES),
-          );
-          if (fallbackBudget.didAdjust) {
-            console.warn(
-              `[Catty] Request payload trimmed to ${fallbackBudget.estimatedBytes} bytes after compaction fallback.`,
-            );
+          const fallbackMessages = keepRecentContextMessages(messages, DEFAULT_PROTECT_RECENT_MESSAGES);
+          if (!compressForRequestTooLargeRetry) {
+            return fallbackMessages;
          }
-          return fallbackBudget.messages;
+          const compressed = compressMessagesForRequestTooLargeRetry(fallbackMessages);
+          if (compressed.didAdjust) {
+            console.warn('[Catty] Request content compressed after compaction fallback.');
+          }
+          return compressed.messages;
        }
      };
-      const payloadBudgetResult = applyRequestPayloadBudget(sdkMessages);
-      let messagesForStream = payloadBudgetResult.messages;
-      if (payloadBudgetResult.didAdjust) {
-        console.warn(
-          `[Catty] Request payload trimmed to ${payloadBudgetResult.estimatedBytes} bytes to avoid HTTP 413.`,
-        );
-      }
-      messagesForStream = await compactAndBudgetMessages(messagesForStream, {
-        trimLog: '[Catty] Request payload re-trimmed after context compaction to',
+      let messagesForStream = sdkMessages;
+      messagesForStream = await compactMessages(messagesForStream, {
        fallbackLog: '[Catty] Context compaction failed; falling back to recent messages only:',
      });

@@ -1080,23 +1127,50 @@ export function useAIChatStreaming({
        }

        console.warn('[Catty] Request hit HTTP 413; forcing context compaction and retrying once.', streamErr);
-        updateMessageById(sessionId, assistantMsgId, msg => ({
-          ...msg,
-          content: '',
-          thinking: undefined,
-          thinkingDurationMs: undefined,
-          providerContinuation: undefined,
-          toolCalls: undefined,
-          errorInfo: undefined,
-          executionStatus: undefined,
-          pendingApproval: undefined,
-          statusText: 'Request was too large. Compacting context and retrying...',
-        }));
-        const retryMessages = prepareMessagesForStream(await compactAndBudgetMessages(messagesForStream, {
+        const statusText = 'Request was too large. Compacting context and retrying...';
+        const hadToolProgress = hadToolProgressBeforeRequestTooLarge(streamErr);
+        let retryBaseMessages = messagesForStream;
+        let retryAssistantMsgId = assistantMsgId;
+        if (hadToolProgress) {
+          const latestSession = latestAISessionsSnapshot?.find(session => session.id === sessionId);
+          if (latestSession) {
+            retryBaseMessages = buildSdkMessages(latestSession.messages, false, {
+              preserveTerminalToolResults: collectToolResultsAfterMessage(
+                latestSession.messages,
+                assistantMsgId,
+              ),
+            });
+          }
+          retryAssistantMsgId = generateId();
+          addMessageToSession(sessionId, {
+            id: retryAssistantMsgId,
+            role: 'assistant',
+            content: '',
+            timestamp: Date.now(),
+            model: activeModelId || context.activeProvider?.defaultModel || '',
+            providerId: context.activeProvider?.providerId,
+            statusText,
+          });
+        } else {
+          updateMessageById(sessionId, assistantMsgId, msg => ({
+            ...msg,
+            content: '',
+            thinking: undefined,
+            thinkingDurationMs: undefined,
+            providerContinuation: undefined,
+            toolCalls: undefined,
+            errorInfo: undefined,
+            executionStatus: undefined,
+            pendingApproval: undefined,
+            statusText,
+          }));
+        }
+        const retryMessages = prepareMessagesForStream(await compactMessages(retryBaseMessages, {
          force: true,
-          statusText: 'Request was too large. Compacting context and retrying...',
-          trimLog: '[Catty] Request payload trimmed after forced context compaction to',
+          statusText,
          fallbackLog: '[Catty] Forced context compaction after 413 failed; falling back to recent messages only:',
+          compressForRequestTooLargeRetry: true,
+          compressionLog: '[Catty] Request content compressed after forced context compaction.',
        }));

        await processCattyStream(
@@ -1106,7 +1180,7 @@ export function useAIChatStreaming({
          tools,
          retryMessages,
          abortController.signal,
-          assistantMsgId,
+          retryAssistantMsgId,
          context.activeProvider?.advancedParams,
          continuationContext,
        );
@@ -1123,7 +1197,7 @@ export function useAIChatStreaming({
    }
  }, [
    processCattyStream, reportStreamError, setStreamingForScope,
-    updateLastMessage, updateMessageById,
+    addMessageToSession, updateLastMessage, updateMessageById,
  ]);

  return {
--- a/infrastructure/ai/cattyRequestTooLargeRetry.test.ts
+++ b/infrastructure/ai/cattyRequestTooLargeRetry.test.ts
@@ -0,0 +1,29 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import {
+  createCattyRequestTooLargeRetryError,
+  hadToolProgressBeforeRequestTooLarge,
+} from "./cattyRequestTooLargeRetry.ts";
+
+test("createCattyRequestTooLargeRetryError marks 413 retry errors after tool progress", () => {
+  const source = Object.assign(new Error("HTTP 413 Request Entity Too Large"), {
+    status: 413,
+    responseBody: "<html>too large</html>",
+  });
+
+  const retryError = createCattyRequestTooLargeRetryError(source, true);
+
+  assert.equal(retryError.statusCode, 413);
+  assert.equal(retryError.status, 413);
+  assert.equal(retryError.responseBody, "<html>too large</html>");
+  assert.equal(retryError.cause, source);
+  assert.equal(hadToolProgressBeforeRequestTooLarge(retryError), true);
+});
+
+test("hadToolProgressBeforeRequestTooLarge is false when no tool progress was recorded", () => {
+  const retryError = createCattyRequestTooLargeRetryError("HTTP 413", false);
+
+  assert.equal(hadToolProgressBeforeRequestTooLarge(retryError), false);
+  assert.equal(hadToolProgressBeforeRequestTooLarge(new Error("HTTP 413")), false);
+});
--- a/infrastructure/ai/cattyRequestTooLargeRetry.ts
+++ b/infrastructure/ai/cattyRequestTooLargeRetry.ts
@@ -0,0 +1,34 @@
+export type CattyRequestTooLargeRetryError = Error & {
+  cattyHadToolProgress?: boolean;
+  statusCode?: number;
+  status?: number;
+  responseBody?: string;
+};
+
+export function createCattyRequestTooLargeRetryError(
+  error: unknown,
+  hadToolProgress: boolean,
+): CattyRequestTooLargeRetryError {
+  const message = error instanceof Error
+    ? error.message
+    : String(error ?? 'Request too large');
+  const retryError = new Error(message) as CattyRequestTooLargeRetryError;
+  retryError.name = 'CattyRequestTooLargeRetryError';
+  retryError.cause = error;
+  retryError.cattyHadToolProgress = hadToolProgress;
+  retryError.statusCode = 413;
+  if (error && typeof error === 'object') {
+    const source = error as Record<string, unknown>;
+    if (typeof source.status === 'number') retryError.status = source.status;
+    if (typeof source.responseBody === 'string') retryError.responseBody = source.responseBody;
+  }
+  return retryError;
+}
+
+export function hadToolProgressBeforeRequestTooLarge(error: unknown): boolean {
+  return !!(
+    error &&
+    typeof error === 'object' &&
+    (error as { cattyHadToolProgress?: boolean }).cattyHadToolProgress
+  );
+}
--- a/infrastructure/ai/requestPayloadBudget.test.ts
+++ b/infrastructure/ai/requestPayloadBudget.test.ts
@@ -1,233 +0,0 @@
-import test from "node:test";
-import assert from "node:assert/strict";
-import type { ModelMessage } from "ai";
-
-import {
-  DEFAULT_MAX_REQUEST_PAYLOAD_BYTES,
-  compressVerboseText,
-  estimateUtf8Bytes,
-  fitMessagesToRequestPayloadBudget,
-  truncateTextWithHeadAndTail,
-} from "./requestPayloadBudget.ts";
-
-test("compressVerboseText collapses repeated blank lines and duplicate runs", () => {
-  const input = "line1\n\n\n\n\nline2\nsame\nsame\nsame\nsame\nline3";
-  const output = compressVerboseText(input);
-  assert.match(output, /line1\n\n\nline2/);
-  assert.ok(output.split("\nsame\n").length <= 3);
-});
-
-test("truncateTextWithHeadAndTail keeps both ends of long terminal output", () => {
-  const value = `${"A".repeat(500)}${"B".repeat(20_000)}${"C".repeat(500)}`;
-  const truncated = truncateTextWithHeadAndTail(value, 2_000);
-  assert.ok(truncated.startsWith("AAA"));
-  assert.ok(truncated.includes("[... output truncated for request size ...]"));
-  assert.ok(truncated.endsWith("CCC"));
-  assert.ok(truncated.length <= 2_000);
-});
-
-test("fitMessagesToRequestPayloadBudget truncates verbose tool results before dropping recent turns", () => {
-  const messages: ModelMessage[] = [
-    { role: "user", content: "run build" },
-    {
-      role: "assistant",
-      content: [{
-        type: "tool-call",
-        toolCallId: "call-1",
-        toolName: "terminal_execute",
-        input: { command: "npm run build" },
-      }],
-    },
-    {
-      role: "tool",
-      content: [{
-        type: "tool-result",
-        toolCallId: "call-1",
-        toolName: "terminal_execute",
-        output: { type: "text", value: "X".repeat(200_000) },
-      }],
-    },
-    { role: "user", content: "what failed?" },
-  ];
-
-  const result = fitMessagesToRequestPayloadBudget({
-    messages,
-    maxPayloadBytes: 20_000,
-    reservedBytes: 2_000,
-    maxToolResultChars: 4_000,
-    protectRecentMessages: 4,
-  });
-
-  assert.equal(result.messages.length, 4);
-  const toolMessage = result.messages[2];
-  assert.equal(toolMessage.role, "tool");
-  assert.ok(Array.isArray(toolMessage.content));
-  const toolPart = toolMessage.content[0] as { output?: { value?: string } };
-  assert.ok((toolPart.output?.value?.length ?? 0) < 5_000);
-  assert.ok(result.estimatedBytes <= 20_000);
-});
-
-test("fitMessagesToRequestPayloadBudget drops older turns when truncation alone is insufficient", () => {
-  const messages: ModelMessage[] = [];
-  for (let turn = 0; turn < 12; turn += 1) {
-    messages.push({ role: "user", content: `question ${turn}` });
-    messages.push({ role: "assistant", content: `answer ${turn} ${"Z".repeat(20_000)}` });
-  }
-  messages.push({ role: "user", content: "latest question" });
-
-  const result = fitMessagesToRequestPayloadBudget({
-    messages,
-    maxPayloadBytes: 8_000,
-    reservedBytes: 500,
-    protectRecentMessages: 4,
-    maxMessageTextChars: 2_000,
-  });
-
-  assert.ok(result.messages.length < messages.length);
-  assert.equal(result.messages.at(-1)?.role, "user");
-  assert.match(String(result.messages.at(-1)?.content ?? ""), /latest question/);
-  assert.ok(result.estimatedBytes <= 8_000);
-});
-
-test("estimateUtf8Bytes measures JSON payload size in UTF-8 bytes", () => {
-  const bytes = estimateUtf8Bytes({ text: "caf\u00e9" });
-  assert.ok(bytes > 8);
-});
-
-test("estimateUtf8Bytes works in renderer-like environments without Buffer", () => {
-  const originalBuffer = globalThis.Buffer;
-  try {
-    (globalThis as typeof globalThis & { Buffer?: typeof Buffer }).Buffer = undefined;
-    assert.equal(estimateUtf8Bytes({ text: "caf\u00e9" }), new TextEncoder().encode(JSON.stringify({ text: "caf\u00e9" })).byteLength);
-  } finally {
-    (globalThis as typeof globalThis & { Buffer?: typeof Buffer }).Buffer = originalBuffer;
-  }
-});
-
-test("default payload budget remains a general gateway guard", () => {
-  assert.equal(DEFAULT_MAX_REQUEST_PAYLOAD_BYTES, 1_500_000);
-});
-
-test("fitMessagesToRequestPayloadBudget preserves current long text when the request is under budget", () => {
-  const currentText = "CURRENT ".repeat(4_000);
-  const result = fitMessagesToRequestPayloadBudget({
-    messages: [{ role: "user", content: currentText }],
-    maxPayloadBytes: 100_000,
-  });
-
-  assert.equal(result.didAdjust, false);
-  assert.equal(result.messages[0].content, currentText);
-});
-
-test("fitMessagesToRequestPayloadBudget reports didAdjust when initial truncation succeeds", () => {
-  const messages: ModelMessage[] = [
-    { role: "user", content: "run build" },
-    {
-      role: "tool",
-      content: [{
-        type: "tool-result",
-        toolCallId: "call-1",
-        toolName: "terminal_execute",
-        output: { type: "text", value: "X".repeat(200_000) },
-      }],
-    },
-  ];
-
-  const result = fitMessagesToRequestPayloadBudget({
-    messages,
-    maxPayloadBytes: 20_000,
-    reservedBytes: 2_000,
-  });
-
-  assert.equal(result.didAdjust, true);
-  assert.ok(result.estimatedBytes <= 20_000);
-});
-
-test("fitMessagesToRequestPayloadBudget keeps dropping messages after emergency caps when still over budget", () => {
-  const messages: ModelMessage[] = [];
-  for (let turn = 0; turn < 8; turn += 1) {
-    messages.push({ role: "user", content: `question ${turn} ${"Q".repeat(5_000)}` });
-    messages.push({ role: "assistant", content: `answer ${turn} ${"A".repeat(5_000)}` });
-  }
-
-  const result = fitMessagesToRequestPayloadBudget({
-    messages,
-    maxPayloadBytes: 5_000,
-    protectRecentMessages: 8,
-    maxMessageTextChars: 2_000,
-  });
-
-  assert.ok(result.messages.length < messages.length);
-  assert.ok(result.estimatedBytes <= 5_000);
-});
-
-test("fitMessagesToRequestPayloadBudget shrinks a single oversized message for very small budgets", () => {
-  const result = fitMessagesToRequestPayloadBudget({
-    messages: [{ role: "assistant", content: "Z".repeat(1_000_000) }],
-    maxPayloadBytes: 1_000,
-    maxMessageTextChars: 500,
-  });
-
-  assert.equal(result.messages.length, 1);
-  assert.ok(result.estimatedBytes <= 1_000);
-});
-
-test("fitMessagesToRequestPayloadBudget returns empty messages when budget is fully reserved", () => {
-  const result = fitMessagesToRequestPayloadBudget({
-    messages: [{ role: "user", content: "hello" }],
-    maxPayloadBytes: 100,
-    reservedBytes: 200,
-  });
-
-  assert.deepEqual(result.messages, []);
-  assert.equal(result.didAdjust, true);
-  assert.equal(result.estimatedBytes, 0);
-});
-
-test("fitMessagesToRequestPayloadBudget omits latest attachments only when they are still over budget at the last resort", () => {
-  const result = fitMessagesToRequestPayloadBudget({
-    messages: [{
-      role: "user",
-      content: [
-        { type: "text", text: "please inspect this image" },
-        { type: "image", image: "A".repeat(1_000_000), mediaType: "image/png" },
-      ],
-    }],
-    maxPayloadBytes: 20_000,
-  });
-
-  assert.ok(result.estimatedBytes <= 20_000);
-  assert.equal(result.messages.length, 1);
-  const content = result.messages[0].content;
-  assert.ok(Array.isArray(content));
-  assert.deepEqual(content[1], {
-    type: "text",
-    text: "[image attachment omitted to keep the AI request small: mediaType=image/png, 1000000 chars]",
-  });
-});
-
-test("fitMessagesToRequestPayloadBudget omits older oversized attachment payloads as a last resort", () => {
-  const result = fitMessagesToRequestPayloadBudget({
-    messages: [
-      {
-        role: "user",
-        content: [
-          { type: "text", text: "older image" },
-          { type: "image", image: "A".repeat(1_000_000), mediaType: "image/png" },
-        ],
-      },
-      { role: "user", content: "current question" },
-    ],
-    maxPayloadBytes: 20_000,
-    protectRecentMessages: 2,
-  });
-
-  assert.ok(result.estimatedBytes <= 20_000);
-  assert.equal(result.messages.length, 2);
-  const content = result.messages[0].content;
-  assert.ok(Array.isArray(content));
-  assert.deepEqual(content[1], {
-    type: "text",
-    text: "[image attachment omitted to keep the AI request small: mediaType=image/png, 1000000 chars]",
-  });
-});
--- a/infrastructure/ai/requestPayloadBudget.ts
+++ b/infrastructure/ai/requestPayloadBudget.ts
@@ -1,335 +0,0 @@
-import type { ModelMessage } from "ai";
-import { findSafeCompactionSplitIndex } from "./contextCompaction";
-
-/** Stay below typical nginx `client_max_body_size` defaults (often 1-2 MB). */
-export const DEFAULT_MAX_REQUEST_PAYLOAD_BYTES = 1_500_000;
-/** Per tool-result text cap before the sliding window drops older turns. */
-export const DEFAULT_MAX_TOOL_RESULT_CHARS = 12_000;
-/** Per plain user/assistant text cap inside a single history message. */
-export const DEFAULT_MAX_MESSAGE_TEXT_CHARS = 24_000;
-/** Keep this many recent messages while trimming payload size. */
-export const DEFAULT_PROTECT_RECENT_PAYLOAD_MESSAGES = 8;
-
-const TRUNCATION_MARKER = "\n\n[... output truncated for request size ...]\n\n";
-const HEAD_CHARS = 800;
-const TAIL_CHARS = 4_000;
-
-export interface FitMessagesToRequestPayloadBudgetInput {
-  messages: ModelMessage[];
-  maxPayloadBytes?: number;
-  reservedBytes?: number;
-  maxToolResultChars?: number;
-  maxMessageTextChars?: number;
-  protectRecentMessages?: number;
-  preserveLatestMessage?: boolean;
-}
-
-export interface FitMessagesToRequestPayloadBudgetResult {
-  messages: ModelMessage[];
-  didAdjust: boolean;
-  estimatedBytes: number;
-}
-
-export function estimateUtf8Bytes(value: unknown): number {
-  const text = stringifyForByteEstimate(value);
-  return utf8ByteLength(text);
-}
-
-function stringifyForByteEstimate(value: unknown): string {
-  try {
-    return JSON.stringify(value);
-  } catch {
-    return String(value ?? "");
-  }
-}
-
-function utf8ByteLength(value: string | undefined): number {
-  const text = value ?? "";
-  if (typeof Buffer !== "undefined" && typeof Buffer.byteLength === "function") {
-    return Buffer.byteLength(text, "utf8");
-  }
-  return new TextEncoder().encode(text).byteLength;
-}
-
-/**
- * Collapse noisy terminal/build output before measuring payload size.
- * Keeps semantics while removing repeated blank lines and long duplicate runs.
- */
-export function compressVerboseText(value: string): string {
-  if (!value) return value;
-
-  let compressed = value.replace(/\r\n/g, "\n");
-  compressed = compressed.replace(/\n{4,}/g, "\n\n\n");
-
-  const lines = compressed.split("\n");
-  const deduped: string[] = [];
-  let repeatCount = 0;
-  for (const line of lines) {
-    const previous = deduped[deduped.length - 1];
-    if (previous === line) {
-      repeatCount += 1;
-      if (repeatCount <= 2) deduped.push(line);
-      continue;
-    }
-    repeatCount = 0;
-    deduped.push(line);
-  }
-
-  return deduped.join("\n");
-}
-
-export function truncateTextWithHeadAndTail(
-  value: string,
-  maxChars: number,
-  {
-    headChars = HEAD_CHARS,
-    tailChars = TAIL_CHARS,
-    marker = TRUNCATION_MARKER,
-  }: {
-    headChars?: number;
-    tailChars?: number;
-    marker?: string;
-  } = {},
-): string {
-  if (value.length <= maxChars) return value;
-  if (maxChars <= marker.length + 16) {
-    return value.slice(0, maxChars);
-  }
-
-  const budget = maxChars - marker.length;
-  let head = Math.min(headChars, budget);
-  let tail = Math.min(tailChars, Math.max(0, budget - head));
-  if (head + tail > budget) {
-    tail = Math.max(0, budget - head);
-  }
-  if (head + tail >= value.length) {
-    return value.slice(0, maxChars);
-  }
-  if (head + tail <= 0) {
-    return value.slice(0, maxChars);
-  }
-
-  return `${value.slice(0, head).trimEnd()}${marker}${value.slice(-tail).trimStart()}`;
-}
-
-export function truncateModelMessageForPayload(
-  message: ModelMessage,
-  {
-    maxToolResultChars = DEFAULT_MAX_TOOL_RESULT_CHARS,
-    maxMessageTextChars = DEFAULT_MAX_MESSAGE_TEXT_CHARS,
-    omitLargeAttachments = false,
-    preserveContent = false,
-  }: {
-    maxToolResultChars?: number;
-    maxMessageTextChars?: number;
-    omitLargeAttachments?: boolean;
-    preserveContent?: boolean;
-  } = {},
-): ModelMessage {
-  if (preserveContent) return message;
-
-  if (typeof message.content === "string") {
-    const compressed = compressVerboseText(message.content);
-    return {
-      ...message,
-      content: truncateTextWithHeadAndTail(compressed, maxMessageTextChars),
-    };
-  }
-
-  if (!Array.isArray(message.content)) return message;
-
-  return {
-    ...message,
-    content: message.content.map((part) => truncateContentPartForPayload(part, {
-      maxToolResultChars,
-      maxMessageTextChars,
-      omitLargeAttachments,
-    })),
-  };
-}
-
-function truncateContentPartForPayload(
-  part: unknown,
-  limits: {
-    maxToolResultChars: number;
-    maxMessageTextChars: number;
-    omitLargeAttachments: boolean;
-  },
-): unknown {
-  if (!part || typeof part !== "object") return part;
-  const record = part as Record<string, unknown>;
-  const type = record.type;
-
-  if (type === "text" && typeof record.text === "string") {
-    const compressed = compressVerboseText(record.text);
-    return {
-      ...record,
-      text: truncateTextWithHeadAndTail(compressed, limits.maxMessageTextChars),
-    };
-  }
-
-  if (type === "tool-result") {
-    const output = record.output;
-    if (output && typeof output === "object") {
-      const outputRecord = output as Record<string, unknown>;
-      if (outputRecord.type === "text" && typeof outputRecord.value === "string") {
-        const compressed = compressVerboseText(outputRecord.value);
-        return {
-          ...record,
-          output: {
-            ...outputRecord,
-            value: truncateTextWithHeadAndTail(compressed, limits.maxToolResultChars),
-          },
-        };
-      }
-    }
-  }
-
-  if (limits.omitLargeAttachments && type === "image" && typeof record.image === "string") {
-    return omittedAttachmentTextPart("image", record.image, record);
-  }
-
-  if (limits.omitLargeAttachments && type === "file" && typeof record.data === "string") {
-    return omittedAttachmentTextPart("file", record.data, record);
-  }
-
-  return part;
-}
-
-function omittedAttachmentTextPart(
-  label: "image" | "file",
-  payload: string,
-  record: Record<string, unknown>,
-): { type: "text"; text: string } {
-  const details = [
-    typeof record.filename === "string" ? `filename=${record.filename}` : undefined,
-    typeof record.mediaType === "string" ? `mediaType=${record.mediaType}` : undefined,
-    `${payload.length} chars`,
-  ].filter(Boolean).join(", ");
-
-  return {
-    type: "text",
-    text: `[${label} attachment omitted to keep the AI request small: ${details}]`,
-  };
-}
-
-export function fitMessagesToRequestPayloadBudget({
-  messages,
-  maxPayloadBytes = DEFAULT_MAX_REQUEST_PAYLOAD_BYTES,
-  reservedBytes = 0,
-  maxToolResultChars = DEFAULT_MAX_TOOL_RESULT_CHARS,
-  maxMessageTextChars = DEFAULT_MAX_MESSAGE_TEXT_CHARS,
-  protectRecentMessages = DEFAULT_PROTECT_RECENT_PAYLOAD_MESSAGES,
-  preserveLatestMessage = true,
-}: FitMessagesToRequestPayloadBudgetInput): FitMessagesToRequestPayloadBudgetResult {
-  const budget = Math.max(0, maxPayloadBytes - Math.max(0, reservedBytes));
-  if (budget === 0) {
-    return { messages: [], didAdjust: messages.length > 0, estimatedBytes: 0 };
-  }
-  const originalBytes = estimateUtf8Bytes(messages);
-  if (originalBytes <= budget) {
-    return { messages, didAdjust: false, estimatedBytes: originalBytes };
-  }
-
-  const shouldPreserveMessage = (message: ModelMessage, index: number, list: ModelMessage[]) => (
-    preserveLatestMessage && index === list.length - 1 && message.role === "user"
-  );
-
-  let adjusted = messages.map((message, index) => truncateModelMessageForPayload(message, {
-    maxToolResultChars,
-    maxMessageTextChars,
-    preserveContent: shouldPreserveMessage(message, index, messages),
-  }));
-  let estimatedBytes = estimateUtf8Bytes(adjusted);
-  let didAdjust = estimatedBytes !== originalBytes;
-  if (estimatedBytes <= budget) {
-    return { messages: adjusted, didAdjust, estimatedBytes };
-  }
-
-  const toolResultCaps = [
-    maxToolResultChars,
-    Math.floor(maxToolResultChars * 0.6),
-    Math.floor(maxToolResultChars * 0.35),
-    4_000,
-    2_000,
-    1_000,
-  ];
-  const messageTextCaps = [
-    maxMessageTextChars,
-    Math.floor(maxMessageTextChars * 0.6),
-    Math.floor(maxMessageTextChars * 0.35),
-    8_000,
-    4_000,
-    2_000,
-  ];
-
-  for (let i = 1; i < toolResultCaps.length; i += 1) {
-    adjusted = adjusted.map((message, index) => truncateModelMessageForPayload(message, {
-      maxToolResultChars: toolResultCaps[i],
-      maxMessageTextChars: messageTextCaps[i],
-      preserveContent: shouldPreserveMessage(message, index, adjusted),
-    }));
-    estimatedBytes = estimateUtf8Bytes(adjusted);
-    didAdjust = true;
-    if (estimatedBytes <= budget) {
-      return { messages: adjusted, didAdjust, estimatedBytes };
-    }
-  }
-
-  let working = [...adjusted];
-  while (working.length > protectRecentMessages) {
-    const splitAt = findSafeCompactionSplitIndex(working, protectRecentMessages);
-    if (splitAt <= 0) break;
-    working = working.slice(splitAt);
-    estimatedBytes = estimateUtf8Bytes(working);
-    didAdjust = true;
-    if (estimatedBytes <= budget) {
-      return { messages: working, didAdjust, estimatedBytes };
-    }
-  }
-
-  const emergencyToolCap = 600;
-  const emergencyTextCap = 1_200;
-  working = working.map((message, index) => truncateModelMessageForPayload(message, {
-    maxToolResultChars: emergencyToolCap,
-    maxMessageTextChars: emergencyTextCap,
-    omitLargeAttachments: true,
-    preserveContent: shouldPreserveMessage(message, index, working),
-  }));
-  estimatedBytes = estimateUtf8Bytes(working);
-  didAdjust = true;
-
-  let emergencyProtect = Math.min(protectRecentMessages, working.length);
-  while (estimatedBytes > budget && working.length > 1) {
-    emergencyProtect = Math.max(1, emergencyProtect - 1);
-    const splitAt = findSafeCompactionSplitIndex(working, emergencyProtect);
-    if (splitAt <= 0) {
-      working = working.slice(-1);
-    } else {
-      working = working.slice(splitAt);
-    }
-    working = working.map((message, index) => truncateModelMessageForPayload(message, {
-      maxToolResultChars: emergencyToolCap,
-      maxMessageTextChars: emergencyTextCap,
-      omitLargeAttachments: true,
-      preserveContent: shouldPreserveMessage(message, index, working),
-    }));
-    estimatedBytes = estimateUtf8Bytes(working);
-  }
-
-  let finalTextCap = emergencyTextCap;
-  let finalToolCap = emergencyToolCap;
-  while (estimatedBytes > budget && (finalTextCap > 32 || finalToolCap > 32)) {
-    finalTextCap = Math.max(32, Math.floor(finalTextCap * 0.6));
-    finalToolCap = Math.max(32, Math.floor(finalToolCap * 0.6));
-    working = working.map((message) => truncateModelMessageForPayload(message, {
-      maxToolResultChars: finalToolCap,
-      maxMessageTextChars: finalTextCap,
-      omitLargeAttachments: true,
-      preserveContent: false,
-    }));
-    estimatedBytes = estimateUtf8Bytes(working);
-  }
-
-  return { messages: working, didAdjust, estimatedBytes };
-}
--- a/infrastructure/ai/requestPayloadCompression.test.ts
+++ b/infrastructure/ai/requestPayloadCompression.test.ts
@@ -0,0 +1,74 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import type { ModelMessage } from "ai";
+
+import {
+  compressMessagesForRequestTooLargeRetry,
+  compressVerboseText,
+  truncateTextWithHeadAndTail,
+} from "./requestPayloadCompression.ts";
+
+test("compressVerboseText collapses repeated blank lines and duplicate runs", () => {
+  const input = "line1\n\n\n\n\nline2\nsame\nsame\nsame\nsame\nline3";
+  const output = compressVerboseText(input);
+  assert.match(output, /line1\n\n\nline2/);
+  assert.ok(output.split("\nsame\n").length <= 3);
+});
+
+test("truncateTextWithHeadAndTail keeps both ends of long terminal output", () => {
+  const value = `${"A".repeat(500)}${"B".repeat(20_000)}${"C".repeat(500)}`;
+  const truncated = truncateTextWithHeadAndTail(value, 2_000);
+  assert.ok(truncated.startsWith("AAA"));
+  assert.ok(truncated.includes("[... output truncated for request size ...]"));
+  assert.ok(truncated.endsWith("CCC"));
+  assert.ok(truncated.length <= 2_000);
+});
+
+test("compressMessagesForRequestTooLargeRetry compresses messages without enforcing a byte budget", () => {
+  const messages: ModelMessage[] = [
+    { role: "user", content: "run build" },
+    {
+      role: "tool",
+      content: [{
+        type: "tool-result",
+        toolCallId: "call-1",
+        toolName: "terminal_execute",
+        output: { type: "text", value: "X".repeat(200_000) },
+      }],
+    },
+    {
+      role: "user",
+      content: [
+        { type: "text", text: "please inspect this image" },
+        { type: "image", image: "A".repeat(1_000_000), mediaType: "image/png" },
+      ],
+    },
+  ];
+
+  const result = compressMessagesForRequestTooLargeRetry(messages);
+
+  assert.equal(result.didAdjust, true);
+  assert.deepEqual(Object.keys(result).sort(), ["didAdjust", "messages"]);
+  assert.equal(result.messages.length, messages.length);
+
+  const toolContent = result.messages[1].content;
+  assert.ok(Array.isArray(toolContent));
+  const toolPart = toolContent[0] as { output?: { value?: string } };
+  assert.ok((toolPart.output?.value?.length ?? 0) < 5_000);
+
+  const userContent = result.messages[2].content;
+  assert.ok(Array.isArray(userContent));
+  assert.deepEqual(userContent[1], {
+    type: "text",
+    text: "[image attachment omitted to keep the AI request small: mediaType=image/png, 1000000 chars]",
+  });
+});
+
+test("compressMessagesForRequestTooLargeRetry reports no adjustment for compact messages", () => {
+  const messages: ModelMessage[] = [{ role: "user", content: "hello" }];
+
+  const result = compressMessagesForRequestTooLargeRetry(messages);
+
+  assert.equal(result.didAdjust, false);
+  assert.deepEqual(result.messages, messages);
+});
--- a/infrastructure/ai/requestPayloadCompression.ts
+++ b/infrastructure/ai/requestPayloadCompression.ts
@@ -0,0 +1,167 @@
+import type { ModelMessage } from "ai";
+
+const RETRY_MAX_TOOL_RESULT_CHARS = 4_000;
+const RETRY_MAX_MESSAGE_TEXT_CHARS = 8_000;
+const TRUNCATION_MARKER = "\n\n[... output truncated for request size ...]\n\n";
+const HEAD_CHARS = 800;
+const TAIL_CHARS = 4_000;
+
+export interface CompressMessagesForRequestTooLargeRetryResult {
+  messages: ModelMessage[];
+  didAdjust: boolean;
+}
+
+/**
+ * Collapse noisy terminal/build output.
+ * Keeps semantics while removing repeated blank lines and long duplicate runs.
+ */
+export function compressVerboseText(value: string): string {
+  if (!value) return value;
+
+  let compressed = value.replace(/\r\n/g, "\n");
+  compressed = compressed.replace(/\n{4,}/g, "\n\n\n");
+
+  const lines = compressed.split("\n");
+  const deduped: string[] = [];
+  let repeatCount = 0;
+  for (const line of lines) {
+    const previous = deduped[deduped.length - 1];
+    if (previous === line) {
+      repeatCount += 1;
+      if (repeatCount <= 2) deduped.push(line);
+      continue;
+    }
+    repeatCount = 0;
+    deduped.push(line);
+  }
+
+  return deduped.join("\n");
+}
+
+export function truncateTextWithHeadAndTail(
+  value: string,
+  maxChars: number,
+  {
+    headChars = HEAD_CHARS,
+    tailChars = TAIL_CHARS,
+    marker = TRUNCATION_MARKER,
+  }: {
+    headChars?: number;
+    tailChars?: number;
+    marker?: string;
+  } = {},
+): string {
+  if (value.length <= maxChars) return value;
+  if (maxChars <= marker.length + 16) {
+    return value.slice(0, maxChars);
+  }
+
+  const budget = maxChars - marker.length;
+  const head = Math.min(headChars, budget);
+  let tail = Math.min(tailChars, Math.max(0, budget - head));
+  if (head + tail > budget) {
+    tail = Math.max(0, budget - head);
+  }
+  if (head + tail >= value.length) {
+    return value.slice(0, maxChars);
+  }
+  if (head + tail <= 0) {
+    return value.slice(0, maxChars);
+  }
+
+  return `${value.slice(0, head).trimEnd()}${marker}${value.slice(-tail).trimStart()}`;
+}
+
+export function compressMessagesForRequestTooLargeRetry(
+  messages: ModelMessage[],
+): CompressMessagesForRequestTooLargeRetryResult {
+  let didAdjust = false;
+  const compressedMessages = messages.map((message) => {
+    const compressed = compressModelMessageForRequestRetry(message);
+    if (compressed !== message) didAdjust = true;
+    return compressed;
+  });
+
+  return {
+    messages: didAdjust ? compressedMessages : messages,
+    didAdjust,
+  };
+}
+
+function compressModelMessageForRequestRetry(message: ModelMessage): ModelMessage {
+  if (typeof message.content === "string") {
+    const content = compressAndTruncateText(message.content, RETRY_MAX_MESSAGE_TEXT_CHARS);
+    return content === message.content ? message : { ...message, content };
+  }
+
+  if (!Array.isArray(message.content)) return message;
+
+  let didAdjust = false;
+  const content = message.content.map((part) => {
+    const compressed = compressContentPartForRequestRetry(part);
+    if (compressed !== part) didAdjust = true;
+    return compressed;
+  });
+
+  return didAdjust ? { ...message, content } : message;
+}
+
+function compressContentPartForRequestRetry(part: unknown): unknown {
+  if (!part || typeof part !== "object") return part;
+  const record = part as Record<string, unknown>;
+  const type = record.type;
+
+  if (type === "text" && typeof record.text === "string") {
+    const text = compressAndTruncateText(record.text, RETRY_MAX_MESSAGE_TEXT_CHARS);
+    return text === record.text ? part : { ...record, text };
+  }
+
+  if (type === "tool-result") {
+    const output = record.output;
+    if (output && typeof output === "object") {
+      const outputRecord = output as Record<string, unknown>;
+      if (outputRecord.type === "text" && typeof outputRecord.value === "string") {
+        const value = compressAndTruncateText(outputRecord.value, RETRY_MAX_TOOL_RESULT_CHARS);
+        if (value === outputRecord.value) return part;
+        return {
+          ...record,
+          output: {
+            ...outputRecord,
+            value,
+          },
+        };
+      }
+    }
+  }
+
+  if (type === "image" && typeof record.image === "string") {
+    return omittedAttachmentTextPart("image", record.image, record);
+  }
+
+  if (type === "file" && typeof record.data === "string") {
+    return omittedAttachmentTextPart("file", record.data, record);
+  }
+
+  return part;
+}
+
+function compressAndTruncateText(value: string, maxChars: number): string {
+  return truncateTextWithHeadAndTail(compressVerboseText(value), maxChars);
+}
+
+function omittedAttachmentTextPart(
+  label: "image" | "file",
+  payload: string,
+  record: Record<string, unknown>,
+): { type: "text"; text: string } {
+  const details = [
+    typeof record.filename === "string" ? `filename=${record.filename}` : undefined,
+    typeof record.mediaType === "string" ? `mediaType=${record.mediaType}` : undefined,
+    `${payload.length} chars`,
+  ].filter(Boolean).join(", ");
+
+  return {
+    type: "text",
+    text: `[${label} attachment omitted to keep the AI request small: ${details}]`,
+  };
+}
--- a/infrastructure/ai/sdk/tools.ts
+++ b/infrastructure/ai/sdk/tools.ts
@@ -15,7 +15,7 @@ import {
 } from '../shared/toolExecutors';
 import { requestApproval } from '../shared/approvalGate';
 import { reserveSessionSlot } from '../shared/sessionExecutionQueue';
-import { truncateTextWithHeadAndTail } from '../requestPayloadBudget';
+import { truncateTextWithHeadAndTail } from '../requestPayloadCompression';

 const MAX_LIVE_TERMINAL_STDOUT_CHARS = 24_000;
 const MAX_LIVE_TERMINAL_STDERR_CHARS = 12_000;