fix(ai): compress Catty requests only after 413 (#1327)
Some checks failed
build-packages / dedupe push run (push) Has been cancelled
build-packages / dedupe result (push) Has been cancelled
build-packages / resolve bundled mosh-client (push) Has been cancelled
build-packages / resolve bundled et-client (push) Has been cancelled
build-packages / build-macos (push) Has been cancelled
build-packages / build-windows (push) Has been cancelled
build-packages / ${{ needs.dedupe.outputs.skip_heavy_ci == 'true' && 'deduped build-linux-x64' || 'build-linux-x64' }} (push) Has been cancelled
build-packages / ${{ needs.dedupe.outputs.skip_heavy_ci == 'true' && 'deduped build-linux-arm64' || 'build-linux-arm64' }} (push) Has been cancelled
build-packages / release (push) Has been cancelled
build-packages / bump homebrew tap (push) Has been cancelled

* fix(ai): compress Catty requests only after 413

* fix(ai): retry 413 after tool progress safely

* fix(ai): mark thrown 413 retries after tool progress

* fix(ai): preserve tool results in 413 retry
This commit is contained in:
陈大猫
2026-06-09 13:11:42 +08:00
committed by GitHub
parent 3bc373dbec
commit 517cbb6cee
10 changed files with 578 additions and 746 deletions

View File

@@ -91,6 +91,23 @@ test("buildHistoricalToolResultReplayText keeps non-terminal tool results intact
assert.equal(buildHistoricalToolResultReplayText(result, toolCall), "search result summary");
});
test("buildHistoricalToolResultReplayText can preserve terminal output for 413 retries", () => {
const toolCall: ToolCall = {
id: "call-1",
name: "terminal_execute",
arguments: { command: "npm test" },
};
const result: ToolResult = {
toolCallId: "call-1",
content: "real terminal output",
};
assert.equal(
buildHistoricalToolResultReplayText(result, toolCall, { preserveTerminalOutput: true }),
"real terminal output",
);
});
test("buildHistoricalToolReplayMaps pairs reused tool ids with the nearest preceding call", () => {
const messages: ChatMessage[] = [
{

View File

@@ -112,9 +112,14 @@ function findLastIndex<T>(items: T[], predicate: (item: T) => boolean): number {
export function buildHistoricalToolResultReplayText(
result: ToolResult,
toolCall?: ToolCall,
{
preserveTerminalOutput = false,
}: {
preserveTerminalOutput?: boolean;
} = {},
): string {
const toolName = toolCall?.name ?? "unknown";
if (!isTerminalToolName(toolName)) {
if (!isTerminalToolName(toolName) || preserveTerminalOutput) {
return result.content;
}

View File

@@ -21,6 +21,7 @@ import type {
ExternalAgentConfig,
ProviderAdvancedParams,
ProviderConfig,
ToolResult,
WebSearchConfig,
} from '../../../infrastructure/ai/types';
import { isWebSearchReady } from '../../../infrastructure/ai/types';
@@ -36,9 +37,12 @@ import {
resolveContextWindow,
} from '../../../infrastructure/ai/contextCompaction';
import {
estimateUtf8Bytes,
fitMessagesToRequestPayloadBudget,
} from '../../../infrastructure/ai/requestPayloadBudget';
compressMessagesForRequestTooLargeRetry,
} from '../../../infrastructure/ai/requestPayloadCompression';
import {
createCattyRequestTooLargeRetryError,
hadToolProgressBeforeRequestTooLarge,
} from '../../../infrastructure/ai/cattyRequestTooLargeRetry';
import { createModelFromConfig } from '../../../infrastructure/ai/sdk/providers';
import { createCattyTools } from '../../../infrastructure/ai/sdk/tools';
import type { ExecutorContext } from '../../../infrastructure/ai/cattyAgent/executor';
@@ -50,6 +54,7 @@ import {
buildPromptWithTerminalSelectionAttachments,
isTerminalSelectionAttachment,
} from '../../../application/state/terminalSelectionAttachment';
import { latestAISessionsSnapshot } from '../../../application/state/aiStateSnapshots';
import {
buildHistoricalToolReplayMaps,
buildHistoricalToolResultReplayText,
@@ -343,7 +348,7 @@ export function useAIChatStreaming({
// Track the current assistant message ID so updates target the correct message
let activeMsgId = currentAssistantMsgId;
let lastAddedRole: 'assistant' | 'tool' = 'assistant';
let hasRetryUnsafeToolProgress = false;
let hadToolProgress = false;
const reader = result.fullStream.getReader();
// -- Text-delta batching: accumulate deltas and flush periodically --
@@ -419,7 +424,16 @@ export function useAIChatStreaming({
try {
while (true) {
const { done, value } = await reader.read();
let readResult: ReadableStreamReadResult<unknown>;
try {
readResult = await reader.read();
} catch (readErr) {
if (isRequestTooLargeError(readErr)) {
throw createCattyRequestTooLargeRetryError(readErr, hadToolProgress);
}
throw readErr;
}
const { done, value } = readResult;
if (done) break;
// Use the StreamChunk union for type narrowing instead of unsafe casts
const chunk = value as StreamChunk;
@@ -486,7 +500,7 @@ export function useAIChatStreaming({
cancelPendingFlush();
flushText();
const typedChunk = chunk as ToolCallChunk;
hasRetryUnsafeToolProgress = true;
hadToolProgress = true;
const messageId = ensureAssistantMessage();
const providerOptions = normalizeProviderContinuationOptions(typedChunk.providerMetadata);
updateMessageById(streamSessionId, messageId, msg => ({
@@ -512,7 +526,7 @@ export function useAIChatStreaming({
cancelPendingFlush();
flushText();
const typedChunk = chunk as ToolResultChunk;
hasRetryUnsafeToolProgress = true;
hadToolProgress = true;
// Mark the assistant message's tool execution as completed
updateMessageById(streamSessionId, activeMsgId, msg =>
msg.role === 'assistant' && msg.executionStatus === 'running'
@@ -559,10 +573,13 @@ export function useAIChatStreaming({
console.warn('[Catty] suppressed SDK stream state error:', typedChunk.error);
break;
}
if (isRequestTooLargeError(typedChunk.error) && !hasRetryUnsafeToolProgress) {
if (isRequestTooLargeError(typedChunk.error)) {
cancelPendingFlush();
flushText();
throw typedChunk.error;
throw createCattyRequestTooLargeRetryError(
typedChunk.error,
hadToolProgress,
);
}
cancelPendingFlush();
flushText();
@@ -796,44 +813,86 @@ export function useAIChatStreaming({
};
try {
// Issue #5: Build SDK messages including tool-call and tool-result messages
// so the LLM maintains full conversation context
const allMessages = currentSession?.messages ?? [];
let openAIChatAssistantFieldsByMessage = new Map<ModelMessage, OpenAIChatAssistantFields | undefined>();
const buildSdkMessages = (
allMessages: ChatMessage[],
includeCurrentUserMessage: boolean,
{
preserveTerminalToolResults = new Set<ToolResult>(),
}: {
preserveTerminalToolResults?: ReadonlySet<ToolResult>;
} = {},
): Array<ModelMessage> => {
const { resolvedToolCallsByAssistant, toolCallByToolResult } = buildHistoricalToolReplayMaps(allMessages);
const nextFieldsByMessage = new Map<ModelMessage, OpenAIChatAssistantFields | undefined>();
const sdkMessages: Array<ModelMessage> = [];
let previousHistoryMessageWasToolResult = false;
const { resolvedToolCallsByAssistant, toolCallByToolResult } = buildHistoricalToolReplayMaps(allMessages);
const sdkMessages: Array<ModelMessage> = [];
const openAIChatAssistantFieldsByMessage = new Map<ModelMessage, OpenAIChatAssistantFields | undefined>();
let previousHistoryMessageWasToolResult = false;
for (const m of allMessages) {
const currentMessageFollowsToolResult = previousHistoryMessageWasToolResult;
if (m.role === 'user') {
// Historical attachments are replayed as placeholders so screenshots,
// files, and terminal selections do not balloon every follow-up request.
const messageAttachments = m.attachments ?? m.images;
sdkMessages.push({
role: 'user',
content: buildHistoricalUserReplayContent(m.content, messageAttachments ?? []),
});
} else if (m.role === 'assistant') {
const activeContinuation = isProviderContinuationForSource(
m.providerContinuation,
continuationContext.source,
)
? m.providerContinuation
: undefined;
const openAIChatAssistantFields = getOpenAIChatAssistantFieldsForHistoryMessage(
m,
continuationContext.source,
);
if (m.toolCalls?.length) {
// Only include tool calls that have matching results
const resolvedToolCalls = resolvedToolCallsByAssistant.get(m);
const resolvedCalls = resolvedToolCalls
? m.toolCalls.filter(tc => resolvedToolCalls.has(tc))
: [];
const contentParts: AssistantContentPart[] = [];
if (resolvedCalls.length > 0) {
for (const m of allMessages) {
const currentMessageFollowsToolResult = previousHistoryMessageWasToolResult;
if (m.role === 'user') {
// Historical attachments are replayed as placeholders so screenshots,
// files, and terminal selections do not balloon every follow-up request.
const messageAttachments = m.attachments ?? m.images;
sdkMessages.push({
role: 'user',
content: buildHistoricalUserReplayContent(m.content, messageAttachments ?? []),
});
} else if (m.role === 'assistant') {
const activeContinuation = isProviderContinuationForSource(
m.providerContinuation,
continuationContext.source,
)
? m.providerContinuation
: undefined;
const openAIChatAssistantFields = getOpenAIChatAssistantFieldsForHistoryMessage(
m,
continuationContext.source,
);
if (m.toolCalls?.length) {
// Only include tool calls that have matching results
const resolvedToolCalls = resolvedToolCallsByAssistant.get(m);
const resolvedCalls = resolvedToolCalls
? m.toolCalls.filter(tc => resolvedToolCalls.has(tc))
: [];
const contentParts: AssistantContentPart[] = [];
if (resolvedCalls.length > 0) {
for (const part of activeContinuation?.reasoningParts ?? []) {
if (!part.text && !part.providerOptions) continue;
contentParts.push({
type: 'reasoning' as const,
text: part.text,
...(part.providerOptions ? { providerOptions: part.providerOptions } : {}),
});
}
}
if (m.content) {
contentParts.push({
type: 'text' as const,
text: m.content,
...(activeContinuation?.textProviderOptions ? { providerOptions: activeContinuation.textProviderOptions } : {}),
});
}
for (const tc of resolvedCalls) {
const providerOptions = activeContinuation?.toolCallProviderOptionsById?.[tc.id];
contentParts.push({
type: 'tool-call' as const,
toolCallId: tc.id,
toolName: tc.name,
input: tc.arguments ?? {},
...(providerOptions ? { providerOptions } : {}),
});
}
// If all tool calls were orphaned, just include the text content
if (contentParts.length > 0) {
const message: ModelMessage = { role: 'assistant', content: toAssistantModelContent(contentParts) };
sdkMessages.push(message);
if (resolvedCalls.length > 0) {
rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, nextFieldsByMessage);
}
}
} else if (m.content) {
const contentParts: AssistantContentPart[] = [];
for (const part of activeContinuation?.reasoningParts ?? []) {
if (!part.text && !part.providerOptions) continue;
contentParts.push({
@@ -842,95 +901,91 @@ export function useAIChatStreaming({
...(part.providerOptions ? { providerOptions: part.providerOptions } : {}),
});
}
}
if (m.content) {
contentParts.push({
type: 'text' as const,
text: m.content,
...(activeContinuation?.textProviderOptions ? { providerOptions: activeContinuation.textProviderOptions } : {}),
});
}
for (const tc of resolvedCalls) {
const providerOptions = activeContinuation?.toolCallProviderOptionsById?.[tc.id];
contentParts.push({
type: 'tool-call' as const,
toolCallId: tc.id,
toolName: tc.name,
input: tc.arguments ?? {},
...(providerOptions ? { providerOptions } : {}),
});
}
// If all tool calls were orphaned, just include the text content
if (contentParts.length > 0) {
const message: ModelMessage = { role: 'assistant', content: toAssistantModelContent(contentParts) };
const message: ModelMessage = {
role: 'assistant',
content: toAssistantModelContent(contentParts),
};
sdkMessages.push(message);
if (resolvedCalls.length > 0) {
rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, openAIChatAssistantFieldsByMessage);
if (currentMessageFollowsToolResult) {
rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, nextFieldsByMessage);
}
}
} else if (m.content) {
const contentParts: AssistantContentPart[] = [];
for (const part of activeContinuation?.reasoningParts ?? []) {
if (!part.text && !part.providerOptions) continue;
contentParts.push({
type: 'reasoning' as const,
text: part.text,
...(part.providerOptions ? { providerOptions: part.providerOptions } : {}),
});
}
contentParts.push({
type: 'text' as const,
text: m.content,
...(activeContinuation?.textProviderOptions ? { providerOptions: activeContinuation.textProviderOptions } : {}),
} else if (m.role === 'tool' && m.toolResults?.length) {
sdkMessages.push({
role: 'tool',
content: m.toolResults.map(tr => {
const toolCall = toolCallByToolResult.get(tr);
return {
type: 'tool-result' as const,
toolCallId: tr.toolCallId,
toolName: toolCall?.name ?? 'unknown',
output: {
type: 'text' as const,
value: buildHistoricalToolResultReplayText(tr, toolCall, {
preserveTerminalOutput: preserveTerminalToolResults.has(tr),
}),
},
};
}),
});
const message: ModelMessage = {
role: 'assistant',
content: toAssistantModelContent(contentParts),
};
sdkMessages.push(message);
if (currentMessageFollowsToolResult) {
rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, openAIChatAssistantFieldsByMessage);
}
previousHistoryMessageWasToolResult = m.role === 'tool' && !!m.toolResults?.length;
}
if (includeCurrentUserMessage) {
// Build the current user message — include attachments as multimodal content
if (attachments?.length) {
const modelText = buildPromptWithTerminalSelectionAttachments(trimmed, attachments);
const modelAttachments = attachments.filter(
(attachment) => !isTerminalSelectionAttachment(attachment),
);
if (!modelAttachments.length) {
sdkMessages.push({ role: 'user', content: modelText });
} else {
const parts: Array<{ type: 'text'; text: string } | { type: 'image'; image: string; mediaType?: string } | { type: 'file'; data: string; mediaType: string; filename?: string }> = [];
parts.push({ type: 'text', text: modelText });
for (const att of modelAttachments) {
if (att.mediaType.startsWith('image/')) {
parts.push({ type: 'image', image: att.base64Data, mediaType: att.mediaType });
} else {
parts.push({ type: 'file', data: att.base64Data, mediaType: att.mediaType, filename: att.filename });
}
}
sdkMessages.push({ role: 'user', content: parts });
}
}
} else if (m.role === 'tool' && m.toolResults?.length) {
sdkMessages.push({
role: 'tool',
content: m.toolResults.map(tr => {
const toolCall = toolCallByToolResult.get(tr);
return {
type: 'tool-result' as const,
toolCallId: tr.toolCallId,
toolName: toolCall?.name ?? 'unknown',
output: { type: 'text' as const, value: buildHistoricalToolResultReplayText(tr, toolCall) },
};
}),
});
}
previousHistoryMessageWasToolResult = m.role === 'tool' && !!m.toolResults?.length;
}
// Build the current user message — include attachments as multimodal content
if (attachments?.length) {
const modelText = buildPromptWithTerminalSelectionAttachments(trimmed, attachments);
const modelAttachments = attachments.filter(
(attachment) => !isTerminalSelectionAttachment(attachment),
);
if (!modelAttachments.length) {
sdkMessages.push({ role: 'user', content: modelText });
} else {
const parts: Array<{ type: 'text'; text: string } | { type: 'image'; image: string; mediaType?: string } | { type: 'file'; data: string; mediaType: string; filename?: string }> = [];
parts.push({ type: 'text', text: modelText });
for (const att of modelAttachments) {
if (att.mediaType.startsWith('image/')) {
parts.push({ type: 'image', image: att.base64Data, mediaType: att.mediaType });
} else {
parts.push({ type: 'file', data: att.base64Data, mediaType: att.mediaType, filename: att.filename });
sdkMessages.push({ role: 'user', content: trimmed });
}
}
sdkMessages.push({ role: 'user', content: parts });
openAIChatAssistantFieldsByMessage = nextFieldsByMessage;
return sdkMessages;
};
const sdkMessages = buildSdkMessages(currentSession?.messages ?? [], true);
const collectToolResultsAfterMessage = (
messages: ChatMessage[],
messageId: string,
): Set<ToolResult> => {
const results = new Set<ToolResult>();
let afterMessage = false;
for (const message of messages) {
if (message.id === messageId) {
afterMessage = true;
continue;
}
if (!afterMessage || message.role !== 'tool' || !message.toolResults?.length) continue;
for (const result of message.toolResults) {
results.add(result);
}
}
} else {
sdkMessages.push({ role: 'user', content: trimmed });
}
return results;
};
// Create model with placeholder API key — the main process injects the real
// decrypted key when the HTTP request is proxied through IPC, so plaintext
@@ -958,20 +1013,12 @@ export function useAIChatStreaming({
defaultContextWindow: DEFAULT_CONTEXT_WINDOW_TOKENS,
});
const outputReserveTokens = Math.min(4096, Math.ceil(contextWindow * 0.05));
const requestReserveTokens = outputReserveTokens + estimateUnknownTokens({
const getRequestReserveTokens = () => outputReserveTokens + estimateUnknownTokens({
systemPrompt,
toolNames: Object.keys(tools),
openAIChatAssistantFields: Array.from(openAIChatAssistantFieldsByMessage.values()),
});
const payloadReservedBytes = estimateUtf8Bytes({
system: systemPrompt,
tools: Object.keys(tools),
});
const applyRequestPayloadBudget = (messages: ModelMessage[]) => fitMessagesToRequestPayloadBudget({
messages,
reservedBytes: payloadReservedBytes,
});
const summarizeForCompaction = async (messagesToSummarize: ModelMessage[]) => {
updateLastMessage(sessionId, msg => ({ ...msg, statusText: 'Compacting earlier context...' }));
const result = await generateText({
@@ -999,64 +1046,64 @@ export function useAIChatStreaming({
);
return pruned;
};
const compactAndBudgetMessages = async (
const compactMessages = async (
messages: ModelMessage[],
{
force = false,
statusText,
trimLog,
fallbackLog,
compressForRequestTooLargeRetry = false,
compressionLog,
}: {
force?: boolean;
statusText?: string;
trimLog: string;
fallbackLog: string;
compressForRequestTooLargeRetry?: boolean;
compressionLog?: string;
},
): Promise<ModelMessage[]> => {
const compressRetryMessages = (candidateMessages: ModelMessage[], log?: string): ModelMessage[] => {
if (!compressForRequestTooLargeRetry) return candidateMessages;
const compressed = compressMessagesForRequestTooLargeRetry(candidateMessages);
if (compressed.didAdjust && log) {
console.warn(log);
}
return compressed.messages;
};
try {
if (statusText) {
updateLastMessage(sessionId, msg => ({ ...msg, statusText }));
}
const inputMessages = compressRetryMessages(messages, compressionLog);
const compacted = await prepareContextCompaction({
messages,
messages: inputMessages,
contextWindow,
reservedTokens: requestReserveTokens,
reservedTokens: getRequestReserveTokens(),
thresholdRatio: force ? 0 : undefined,
protectRecentMessages: DEFAULT_PROTECT_RECENT_MESSAGES,
summarize: summarizeForCompaction,
});
let nextMessages = force && !compacted.didCompact
? keepRecentContextMessages(messages, DEFAULT_PROTECT_RECENT_MESSAGES)
? keepRecentContextMessages(inputMessages, DEFAULT_PROTECT_RECENT_MESSAGES)
: compacted.messages;
const budgetResult = applyRequestPayloadBudget(nextMessages);
if (budgetResult.didAdjust) {
console.warn(`${trimLog} ${budgetResult.estimatedBytes} bytes.`);
nextMessages = budgetResult.messages;
}
return nextMessages;
return compressRetryMessages(nextMessages);
} catch (err) {
if (abortController.signal.aborted) throw err;
console.warn(fallbackLog, err);
const fallbackBudget = applyRequestPayloadBudget(
keepRecentContextMessages(messages, DEFAULT_PROTECT_RECENT_MESSAGES),
);
if (fallbackBudget.didAdjust) {
console.warn(
`[Catty] Request payload trimmed to ${fallbackBudget.estimatedBytes} bytes after compaction fallback.`,
);
const fallbackMessages = keepRecentContextMessages(messages, DEFAULT_PROTECT_RECENT_MESSAGES);
if (!compressForRequestTooLargeRetry) {
return fallbackMessages;
}
return fallbackBudget.messages;
const compressed = compressMessagesForRequestTooLargeRetry(fallbackMessages);
if (compressed.didAdjust) {
console.warn('[Catty] Request content compressed after compaction fallback.');
}
return compressed.messages;
}
};
const payloadBudgetResult = applyRequestPayloadBudget(sdkMessages);
let messagesForStream = payloadBudgetResult.messages;
if (payloadBudgetResult.didAdjust) {
console.warn(
`[Catty] Request payload trimmed to ${payloadBudgetResult.estimatedBytes} bytes to avoid HTTP 413.`,
);
}
messagesForStream = await compactAndBudgetMessages(messagesForStream, {
trimLog: '[Catty] Request payload re-trimmed after context compaction to',
let messagesForStream = sdkMessages;
messagesForStream = await compactMessages(messagesForStream, {
fallbackLog: '[Catty] Context compaction failed; falling back to recent messages only:',
});
@@ -1080,23 +1127,50 @@ export function useAIChatStreaming({
}
console.warn('[Catty] Request hit HTTP 413; forcing context compaction and retrying once.', streamErr);
updateMessageById(sessionId, assistantMsgId, msg => ({
...msg,
content: '',
thinking: undefined,
thinkingDurationMs: undefined,
providerContinuation: undefined,
toolCalls: undefined,
errorInfo: undefined,
executionStatus: undefined,
pendingApproval: undefined,
statusText: 'Request was too large. Compacting context and retrying...',
}));
const retryMessages = prepareMessagesForStream(await compactAndBudgetMessages(messagesForStream, {
const statusText = 'Request was too large. Compacting context and retrying...';
const hadToolProgress = hadToolProgressBeforeRequestTooLarge(streamErr);
let retryBaseMessages = messagesForStream;
let retryAssistantMsgId = assistantMsgId;
if (hadToolProgress) {
const latestSession = latestAISessionsSnapshot?.find(session => session.id === sessionId);
if (latestSession) {
retryBaseMessages = buildSdkMessages(latestSession.messages, false, {
preserveTerminalToolResults: collectToolResultsAfterMessage(
latestSession.messages,
assistantMsgId,
),
});
}
retryAssistantMsgId = generateId();
addMessageToSession(sessionId, {
id: retryAssistantMsgId,
role: 'assistant',
content: '',
timestamp: Date.now(),
model: activeModelId || context.activeProvider?.defaultModel || '',
providerId: context.activeProvider?.providerId,
statusText,
});
} else {
updateMessageById(sessionId, assistantMsgId, msg => ({
...msg,
content: '',
thinking: undefined,
thinkingDurationMs: undefined,
providerContinuation: undefined,
toolCalls: undefined,
errorInfo: undefined,
executionStatus: undefined,
pendingApproval: undefined,
statusText,
}));
}
const retryMessages = prepareMessagesForStream(await compactMessages(retryBaseMessages, {
force: true,
statusText: 'Request was too large. Compacting context and retrying...',
trimLog: '[Catty] Request payload trimmed after forced context compaction to',
statusText,
fallbackLog: '[Catty] Forced context compaction after 413 failed; falling back to recent messages only:',
compressForRequestTooLargeRetry: true,
compressionLog: '[Catty] Request content compressed after forced context compaction.',
}));
await processCattyStream(
@@ -1106,7 +1180,7 @@ export function useAIChatStreaming({
tools,
retryMessages,
abortController.signal,
assistantMsgId,
retryAssistantMsgId,
context.activeProvider?.advancedParams,
continuationContext,
);
@@ -1123,7 +1197,7 @@ export function useAIChatStreaming({
}
}, [
processCattyStream, reportStreamError, setStreamingForScope,
updateLastMessage, updateMessageById,
addMessageToSession, updateLastMessage, updateMessageById,
]);
return {

View File

@@ -0,0 +1,29 @@
import test from "node:test";
import assert from "node:assert/strict";
import {
createCattyRequestTooLargeRetryError,
hadToolProgressBeforeRequestTooLarge,
} from "./cattyRequestTooLargeRetry.ts";
test("createCattyRequestTooLargeRetryError marks 413 retry errors after tool progress", () => {
const source = Object.assign(new Error("HTTP 413 Request Entity Too Large"), {
status: 413,
responseBody: "<html>too large</html>",
});
const retryError = createCattyRequestTooLargeRetryError(source, true);
assert.equal(retryError.statusCode, 413);
assert.equal(retryError.status, 413);
assert.equal(retryError.responseBody, "<html>too large</html>");
assert.equal(retryError.cause, source);
assert.equal(hadToolProgressBeforeRequestTooLarge(retryError), true);
});
test("hadToolProgressBeforeRequestTooLarge is false when no tool progress was recorded", () => {
const retryError = createCattyRequestTooLargeRetryError("HTTP 413", false);
assert.equal(hadToolProgressBeforeRequestTooLarge(retryError), false);
assert.equal(hadToolProgressBeforeRequestTooLarge(new Error("HTTP 413")), false);
});

View File

@@ -0,0 +1,34 @@
export type CattyRequestTooLargeRetryError = Error & {
cattyHadToolProgress?: boolean;
statusCode?: number;
status?: number;
responseBody?: string;
};
export function createCattyRequestTooLargeRetryError(
error: unknown,
hadToolProgress: boolean,
): CattyRequestTooLargeRetryError {
const message = error instanceof Error
? error.message
: String(error ?? 'Request too large');
const retryError = new Error(message) as CattyRequestTooLargeRetryError;
retryError.name = 'CattyRequestTooLargeRetryError';
retryError.cause = error;
retryError.cattyHadToolProgress = hadToolProgress;
retryError.statusCode = 413;
if (error && typeof error === 'object') {
const source = error as Record<string, unknown>;
if (typeof source.status === 'number') retryError.status = source.status;
if (typeof source.responseBody === 'string') retryError.responseBody = source.responseBody;
}
return retryError;
}
export function hadToolProgressBeforeRequestTooLarge(error: unknown): boolean {
return !!(
error &&
typeof error === 'object' &&
(error as { cattyHadToolProgress?: boolean }).cattyHadToolProgress
);
}

View File

@@ -1,233 +0,0 @@
import test from "node:test";
import assert from "node:assert/strict";
import type { ModelMessage } from "ai";
import {
DEFAULT_MAX_REQUEST_PAYLOAD_BYTES,
compressVerboseText,
estimateUtf8Bytes,
fitMessagesToRequestPayloadBudget,
truncateTextWithHeadAndTail,
} from "./requestPayloadBudget.ts";
test("compressVerboseText collapses repeated blank lines and duplicate runs", () => {
const input = "line1\n\n\n\n\nline2\nsame\nsame\nsame\nsame\nline3";
const output = compressVerboseText(input);
assert.match(output, /line1\n\n\nline2/);
assert.ok(output.split("\nsame\n").length <= 3);
});
test("truncateTextWithHeadAndTail keeps both ends of long terminal output", () => {
const value = `${"A".repeat(500)}${"B".repeat(20_000)}${"C".repeat(500)}`;
const truncated = truncateTextWithHeadAndTail(value, 2_000);
assert.ok(truncated.startsWith("AAA"));
assert.ok(truncated.includes("[... output truncated for request size ...]"));
assert.ok(truncated.endsWith("CCC"));
assert.ok(truncated.length <= 2_000);
});
test("fitMessagesToRequestPayloadBudget truncates verbose tool results before dropping recent turns", () => {
const messages: ModelMessage[] = [
{ role: "user", content: "run build" },
{
role: "assistant",
content: [{
type: "tool-call",
toolCallId: "call-1",
toolName: "terminal_execute",
input: { command: "npm run build" },
}],
},
{
role: "tool",
content: [{
type: "tool-result",
toolCallId: "call-1",
toolName: "terminal_execute",
output: { type: "text", value: "X".repeat(200_000) },
}],
},
{ role: "user", content: "what failed?" },
];
const result = fitMessagesToRequestPayloadBudget({
messages,
maxPayloadBytes: 20_000,
reservedBytes: 2_000,
maxToolResultChars: 4_000,
protectRecentMessages: 4,
});
assert.equal(result.messages.length, 4);
const toolMessage = result.messages[2];
assert.equal(toolMessage.role, "tool");
assert.ok(Array.isArray(toolMessage.content));
const toolPart = toolMessage.content[0] as { output?: { value?: string } };
assert.ok((toolPart.output?.value?.length ?? 0) < 5_000);
assert.ok(result.estimatedBytes <= 20_000);
});
test("fitMessagesToRequestPayloadBudget drops older turns when truncation alone is insufficient", () => {
const messages: ModelMessage[] = [];
for (let turn = 0; turn < 12; turn += 1) {
messages.push({ role: "user", content: `question ${turn}` });
messages.push({ role: "assistant", content: `answer ${turn} ${"Z".repeat(20_000)}` });
}
messages.push({ role: "user", content: "latest question" });
const result = fitMessagesToRequestPayloadBudget({
messages,
maxPayloadBytes: 8_000,
reservedBytes: 500,
protectRecentMessages: 4,
maxMessageTextChars: 2_000,
});
assert.ok(result.messages.length < messages.length);
assert.equal(result.messages.at(-1)?.role, "user");
assert.match(String(result.messages.at(-1)?.content ?? ""), /latest question/);
assert.ok(result.estimatedBytes <= 8_000);
});
test("estimateUtf8Bytes measures JSON payload size in UTF-8 bytes", () => {
const bytes = estimateUtf8Bytes({ text: "caf\u00e9" });
assert.ok(bytes > 8);
});
test("estimateUtf8Bytes works in renderer-like environments without Buffer", () => {
const originalBuffer = globalThis.Buffer;
try {
(globalThis as typeof globalThis & { Buffer?: typeof Buffer }).Buffer = undefined;
assert.equal(estimateUtf8Bytes({ text: "caf\u00e9" }), new TextEncoder().encode(JSON.stringify({ text: "caf\u00e9" })).byteLength);
} finally {
(globalThis as typeof globalThis & { Buffer?: typeof Buffer }).Buffer = originalBuffer;
}
});
test("default payload budget remains a general gateway guard", () => {
assert.equal(DEFAULT_MAX_REQUEST_PAYLOAD_BYTES, 1_500_000);
});
test("fitMessagesToRequestPayloadBudget preserves current long text when the request is under budget", () => {
const currentText = "CURRENT ".repeat(4_000);
const result = fitMessagesToRequestPayloadBudget({
messages: [{ role: "user", content: currentText }],
maxPayloadBytes: 100_000,
});
assert.equal(result.didAdjust, false);
assert.equal(result.messages[0].content, currentText);
});
test("fitMessagesToRequestPayloadBudget reports didAdjust when initial truncation succeeds", () => {
const messages: ModelMessage[] = [
{ role: "user", content: "run build" },
{
role: "tool",
content: [{
type: "tool-result",
toolCallId: "call-1",
toolName: "terminal_execute",
output: { type: "text", value: "X".repeat(200_000) },
}],
},
];
const result = fitMessagesToRequestPayloadBudget({
messages,
maxPayloadBytes: 20_000,
reservedBytes: 2_000,
});
assert.equal(result.didAdjust, true);
assert.ok(result.estimatedBytes <= 20_000);
});
test("fitMessagesToRequestPayloadBudget keeps dropping messages after emergency caps when still over budget", () => {
const messages: ModelMessage[] = [];
for (let turn = 0; turn < 8; turn += 1) {
messages.push({ role: "user", content: `question ${turn} ${"Q".repeat(5_000)}` });
messages.push({ role: "assistant", content: `answer ${turn} ${"A".repeat(5_000)}` });
}
const result = fitMessagesToRequestPayloadBudget({
messages,
maxPayloadBytes: 5_000,
protectRecentMessages: 8,
maxMessageTextChars: 2_000,
});
assert.ok(result.messages.length < messages.length);
assert.ok(result.estimatedBytes <= 5_000);
});
test("fitMessagesToRequestPayloadBudget shrinks a single oversized message for very small budgets", () => {
const result = fitMessagesToRequestPayloadBudget({
messages: [{ role: "assistant", content: "Z".repeat(1_000_000) }],
maxPayloadBytes: 1_000,
maxMessageTextChars: 500,
});
assert.equal(result.messages.length, 1);
assert.ok(result.estimatedBytes <= 1_000);
});
test("fitMessagesToRequestPayloadBudget returns empty messages when budget is fully reserved", () => {
const result = fitMessagesToRequestPayloadBudget({
messages: [{ role: "user", content: "hello" }],
maxPayloadBytes: 100,
reservedBytes: 200,
});
assert.deepEqual(result.messages, []);
assert.equal(result.didAdjust, true);
assert.equal(result.estimatedBytes, 0);
});
test("fitMessagesToRequestPayloadBudget omits latest attachments only when they are still over budget at the last resort", () => {
const result = fitMessagesToRequestPayloadBudget({
messages: [{
role: "user",
content: [
{ type: "text", text: "please inspect this image" },
{ type: "image", image: "A".repeat(1_000_000), mediaType: "image/png" },
],
}],
maxPayloadBytes: 20_000,
});
assert.ok(result.estimatedBytes <= 20_000);
assert.equal(result.messages.length, 1);
const content = result.messages[0].content;
assert.ok(Array.isArray(content));
assert.deepEqual(content[1], {
type: "text",
text: "[image attachment omitted to keep the AI request small: mediaType=image/png, 1000000 chars]",
});
});
test("fitMessagesToRequestPayloadBudget omits older oversized attachment payloads as a last resort", () => {
const result = fitMessagesToRequestPayloadBudget({
messages: [
{
role: "user",
content: [
{ type: "text", text: "older image" },
{ type: "image", image: "A".repeat(1_000_000), mediaType: "image/png" },
],
},
{ role: "user", content: "current question" },
],
maxPayloadBytes: 20_000,
protectRecentMessages: 2,
});
assert.ok(result.estimatedBytes <= 20_000);
assert.equal(result.messages.length, 2);
const content = result.messages[0].content;
assert.ok(Array.isArray(content));
assert.deepEqual(content[1], {
type: "text",
text: "[image attachment omitted to keep the AI request small: mediaType=image/png, 1000000 chars]",
});
});

View File

@@ -1,335 +0,0 @@
import type { ModelMessage } from "ai";
import { findSafeCompactionSplitIndex } from "./contextCompaction";
/** Stay below typical nginx `client_max_body_size` defaults (often 1-2 MB). */
export const DEFAULT_MAX_REQUEST_PAYLOAD_BYTES = 1_500_000;
/** Per tool-result text cap before the sliding window drops older turns. */
export const DEFAULT_MAX_TOOL_RESULT_CHARS = 12_000;
/** Per plain user/assistant text cap inside a single history message. */
export const DEFAULT_MAX_MESSAGE_TEXT_CHARS = 24_000;
/** Keep this many recent messages while trimming payload size. */
export const DEFAULT_PROTECT_RECENT_PAYLOAD_MESSAGES = 8;
const TRUNCATION_MARKER = "\n\n[... output truncated for request size ...]\n\n";
const HEAD_CHARS = 800;
const TAIL_CHARS = 4_000;
export interface FitMessagesToRequestPayloadBudgetInput {
messages: ModelMessage[];
maxPayloadBytes?: number;
reservedBytes?: number;
maxToolResultChars?: number;
maxMessageTextChars?: number;
protectRecentMessages?: number;
preserveLatestMessage?: boolean;
}
export interface FitMessagesToRequestPayloadBudgetResult {
messages: ModelMessage[];
didAdjust: boolean;
estimatedBytes: number;
}
export function estimateUtf8Bytes(value: unknown): number {
const text = stringifyForByteEstimate(value);
return utf8ByteLength(text);
}
function stringifyForByteEstimate(value: unknown): string {
try {
return JSON.stringify(value);
} catch {
return String(value ?? "");
}
}
function utf8ByteLength(value: string | undefined): number {
const text = value ?? "";
if (typeof Buffer !== "undefined" && typeof Buffer.byteLength === "function") {
return Buffer.byteLength(text, "utf8");
}
return new TextEncoder().encode(text).byteLength;
}
/**
* Collapse noisy terminal/build output before measuring payload size.
* Keeps semantics while removing repeated blank lines and long duplicate runs.
*/
export function compressVerboseText(value: string): string {
if (!value) return value;
let compressed = value.replace(/\r\n/g, "\n");
compressed = compressed.replace(/\n{4,}/g, "\n\n\n");
const lines = compressed.split("\n");
const deduped: string[] = [];
let repeatCount = 0;
for (const line of lines) {
const previous = deduped[deduped.length - 1];
if (previous === line) {
repeatCount += 1;
if (repeatCount <= 2) deduped.push(line);
continue;
}
repeatCount = 0;
deduped.push(line);
}
return deduped.join("\n");
}
export function truncateTextWithHeadAndTail(
value: string,
maxChars: number,
{
headChars = HEAD_CHARS,
tailChars = TAIL_CHARS,
marker = TRUNCATION_MARKER,
}: {
headChars?: number;
tailChars?: number;
marker?: string;
} = {},
): string {
if (value.length <= maxChars) return value;
if (maxChars <= marker.length + 16) {
return value.slice(0, maxChars);
}
const budget = maxChars - marker.length;
let head = Math.min(headChars, budget);
let tail = Math.min(tailChars, Math.max(0, budget - head));
if (head + tail > budget) {
tail = Math.max(0, budget - head);
}
if (head + tail >= value.length) {
return value.slice(0, maxChars);
}
if (head + tail <= 0) {
return value.slice(0, maxChars);
}
return `${value.slice(0, head).trimEnd()}${marker}${value.slice(-tail).trimStart()}`;
}
export function truncateModelMessageForPayload(
message: ModelMessage,
{
maxToolResultChars = DEFAULT_MAX_TOOL_RESULT_CHARS,
maxMessageTextChars = DEFAULT_MAX_MESSAGE_TEXT_CHARS,
omitLargeAttachments = false,
preserveContent = false,
}: {
maxToolResultChars?: number;
maxMessageTextChars?: number;
omitLargeAttachments?: boolean;
preserveContent?: boolean;
} = {},
): ModelMessage {
if (preserveContent) return message;
if (typeof message.content === "string") {
const compressed = compressVerboseText(message.content);
return {
...message,
content: truncateTextWithHeadAndTail(compressed, maxMessageTextChars),
};
}
if (!Array.isArray(message.content)) return message;
return {
...message,
content: message.content.map((part) => truncateContentPartForPayload(part, {
maxToolResultChars,
maxMessageTextChars,
omitLargeAttachments,
})),
};
}
function truncateContentPartForPayload(
part: unknown,
limits: {
maxToolResultChars: number;
maxMessageTextChars: number;
omitLargeAttachments: boolean;
},
): unknown {
if (!part || typeof part !== "object") return part;
const record = part as Record<string, unknown>;
const type = record.type;
if (type === "text" && typeof record.text === "string") {
const compressed = compressVerboseText(record.text);
return {
...record,
text: truncateTextWithHeadAndTail(compressed, limits.maxMessageTextChars),
};
}
if (type === "tool-result") {
const output = record.output;
if (output && typeof output === "object") {
const outputRecord = output as Record<string, unknown>;
if (outputRecord.type === "text" && typeof outputRecord.value === "string") {
const compressed = compressVerboseText(outputRecord.value);
return {
...record,
output: {
...outputRecord,
value: truncateTextWithHeadAndTail(compressed, limits.maxToolResultChars),
},
};
}
}
}
if (limits.omitLargeAttachments && type === "image" && typeof record.image === "string") {
return omittedAttachmentTextPart("image", record.image, record);
}
if (limits.omitLargeAttachments && type === "file" && typeof record.data === "string") {
return omittedAttachmentTextPart("file", record.data, record);
}
return part;
}
function omittedAttachmentTextPart(
label: "image" | "file",
payload: string,
record: Record<string, unknown>,
): { type: "text"; text: string } {
const details = [
typeof record.filename === "string" ? `filename=${record.filename}` : undefined,
typeof record.mediaType === "string" ? `mediaType=${record.mediaType}` : undefined,
`${payload.length} chars`,
].filter(Boolean).join(", ");
return {
type: "text",
text: `[${label} attachment omitted to keep the AI request small: ${details}]`,
};
}
export function fitMessagesToRequestPayloadBudget({
messages,
maxPayloadBytes = DEFAULT_MAX_REQUEST_PAYLOAD_BYTES,
reservedBytes = 0,
maxToolResultChars = DEFAULT_MAX_TOOL_RESULT_CHARS,
maxMessageTextChars = DEFAULT_MAX_MESSAGE_TEXT_CHARS,
protectRecentMessages = DEFAULT_PROTECT_RECENT_PAYLOAD_MESSAGES,
preserveLatestMessage = true,
}: FitMessagesToRequestPayloadBudgetInput): FitMessagesToRequestPayloadBudgetResult {
const budget = Math.max(0, maxPayloadBytes - Math.max(0, reservedBytes));
if (budget === 0) {
return { messages: [], didAdjust: messages.length > 0, estimatedBytes: 0 };
}
const originalBytes = estimateUtf8Bytes(messages);
if (originalBytes <= budget) {
return { messages, didAdjust: false, estimatedBytes: originalBytes };
}
const shouldPreserveMessage = (message: ModelMessage, index: number, list: ModelMessage[]) => (
preserveLatestMessage && index === list.length - 1 && message.role === "user"
);
let adjusted = messages.map((message, index) => truncateModelMessageForPayload(message, {
maxToolResultChars,
maxMessageTextChars,
preserveContent: shouldPreserveMessage(message, index, messages),
}));
let estimatedBytes = estimateUtf8Bytes(adjusted);
let didAdjust = estimatedBytes !== originalBytes;
if (estimatedBytes <= budget) {
return { messages: adjusted, didAdjust, estimatedBytes };
}
const toolResultCaps = [
maxToolResultChars,
Math.floor(maxToolResultChars * 0.6),
Math.floor(maxToolResultChars * 0.35),
4_000,
2_000,
1_000,
];
const messageTextCaps = [
maxMessageTextChars,
Math.floor(maxMessageTextChars * 0.6),
Math.floor(maxMessageTextChars * 0.35),
8_000,
4_000,
2_000,
];
for (let i = 1; i < toolResultCaps.length; i += 1) {
adjusted = adjusted.map((message, index) => truncateModelMessageForPayload(message, {
maxToolResultChars: toolResultCaps[i],
maxMessageTextChars: messageTextCaps[i],
preserveContent: shouldPreserveMessage(message, index, adjusted),
}));
estimatedBytes = estimateUtf8Bytes(adjusted);
didAdjust = true;
if (estimatedBytes <= budget) {
return { messages: adjusted, didAdjust, estimatedBytes };
}
}
let working = [...adjusted];
while (working.length > protectRecentMessages) {
const splitAt = findSafeCompactionSplitIndex(working, protectRecentMessages);
if (splitAt <= 0) break;
working = working.slice(splitAt);
estimatedBytes = estimateUtf8Bytes(working);
didAdjust = true;
if (estimatedBytes <= budget) {
return { messages: working, didAdjust, estimatedBytes };
}
}
const emergencyToolCap = 600;
const emergencyTextCap = 1_200;
working = working.map((message, index) => truncateModelMessageForPayload(message, {
maxToolResultChars: emergencyToolCap,
maxMessageTextChars: emergencyTextCap,
omitLargeAttachments: true,
preserveContent: shouldPreserveMessage(message, index, working),
}));
estimatedBytes = estimateUtf8Bytes(working);
didAdjust = true;
let emergencyProtect = Math.min(protectRecentMessages, working.length);
while (estimatedBytes > budget && working.length > 1) {
emergencyProtect = Math.max(1, emergencyProtect - 1);
const splitAt = findSafeCompactionSplitIndex(working, emergencyProtect);
if (splitAt <= 0) {
working = working.slice(-1);
} else {
working = working.slice(splitAt);
}
working = working.map((message, index) => truncateModelMessageForPayload(message, {
maxToolResultChars: emergencyToolCap,
maxMessageTextChars: emergencyTextCap,
omitLargeAttachments: true,
preserveContent: shouldPreserveMessage(message, index, working),
}));
estimatedBytes = estimateUtf8Bytes(working);
}
let finalTextCap = emergencyTextCap;
let finalToolCap = emergencyToolCap;
while (estimatedBytes > budget && (finalTextCap > 32 || finalToolCap > 32)) {
finalTextCap = Math.max(32, Math.floor(finalTextCap * 0.6));
finalToolCap = Math.max(32, Math.floor(finalToolCap * 0.6));
working = working.map((message) => truncateModelMessageForPayload(message, {
maxToolResultChars: finalToolCap,
maxMessageTextChars: finalTextCap,
omitLargeAttachments: true,
preserveContent: false,
}));
estimatedBytes = estimateUtf8Bytes(working);
}
return { messages: working, didAdjust, estimatedBytes };
}

View File

@@ -0,0 +1,74 @@
import test from "node:test";
import assert from "node:assert/strict";
import type { ModelMessage } from "ai";
import {
compressMessagesForRequestTooLargeRetry,
compressVerboseText,
truncateTextWithHeadAndTail,
} from "./requestPayloadCompression.ts";
test("compressVerboseText collapses repeated blank lines and duplicate runs", () => {
const input = "line1\n\n\n\n\nline2\nsame\nsame\nsame\nsame\nline3";
const output = compressVerboseText(input);
assert.match(output, /line1\n\n\nline2/);
assert.ok(output.split("\nsame\n").length <= 3);
});
test("truncateTextWithHeadAndTail keeps both ends of long terminal output", () => {
const value = `${"A".repeat(500)}${"B".repeat(20_000)}${"C".repeat(500)}`;
const truncated = truncateTextWithHeadAndTail(value, 2_000);
assert.ok(truncated.startsWith("AAA"));
assert.ok(truncated.includes("[... output truncated for request size ...]"));
assert.ok(truncated.endsWith("CCC"));
assert.ok(truncated.length <= 2_000);
});
test("compressMessagesForRequestTooLargeRetry compresses messages without enforcing a byte budget", () => {
const messages: ModelMessage[] = [
{ role: "user", content: "run build" },
{
role: "tool",
content: [{
type: "tool-result",
toolCallId: "call-1",
toolName: "terminal_execute",
output: { type: "text", value: "X".repeat(200_000) },
}],
},
{
role: "user",
content: [
{ type: "text", text: "please inspect this image" },
{ type: "image", image: "A".repeat(1_000_000), mediaType: "image/png" },
],
},
];
const result = compressMessagesForRequestTooLargeRetry(messages);
assert.equal(result.didAdjust, true);
assert.deepEqual(Object.keys(result).sort(), ["didAdjust", "messages"]);
assert.equal(result.messages.length, messages.length);
const toolContent = result.messages[1].content;
assert.ok(Array.isArray(toolContent));
const toolPart = toolContent[0] as { output?: { value?: string } };
assert.ok((toolPart.output?.value?.length ?? 0) < 5_000);
const userContent = result.messages[2].content;
assert.ok(Array.isArray(userContent));
assert.deepEqual(userContent[1], {
type: "text",
text: "[image attachment omitted to keep the AI request small: mediaType=image/png, 1000000 chars]",
});
});
test("compressMessagesForRequestTooLargeRetry reports no adjustment for compact messages", () => {
const messages: ModelMessage[] = [{ role: "user", content: "hello" }];
const result = compressMessagesForRequestTooLargeRetry(messages);
assert.equal(result.didAdjust, false);
assert.deepEqual(result.messages, messages);
});

View File

@@ -0,0 +1,167 @@
import type { ModelMessage } from "ai";
const RETRY_MAX_TOOL_RESULT_CHARS = 4_000;
const RETRY_MAX_MESSAGE_TEXT_CHARS = 8_000;
const TRUNCATION_MARKER = "\n\n[... output truncated for request size ...]\n\n";
const HEAD_CHARS = 800;
const TAIL_CHARS = 4_000;
export interface CompressMessagesForRequestTooLargeRetryResult {
messages: ModelMessage[];
didAdjust: boolean;
}
/**
* Collapse noisy terminal/build output.
* Keeps semantics while removing repeated blank lines and long duplicate runs.
*/
export function compressVerboseText(value: string): string {
if (!value) return value;
let compressed = value.replace(/\r\n/g, "\n");
compressed = compressed.replace(/\n{4,}/g, "\n\n\n");
const lines = compressed.split("\n");
const deduped: string[] = [];
let repeatCount = 0;
for (const line of lines) {
const previous = deduped[deduped.length - 1];
if (previous === line) {
repeatCount += 1;
if (repeatCount <= 2) deduped.push(line);
continue;
}
repeatCount = 0;
deduped.push(line);
}
return deduped.join("\n");
}
export function truncateTextWithHeadAndTail(
value: string,
maxChars: number,
{
headChars = HEAD_CHARS,
tailChars = TAIL_CHARS,
marker = TRUNCATION_MARKER,
}: {
headChars?: number;
tailChars?: number;
marker?: string;
} = {},
): string {
if (value.length <= maxChars) return value;
if (maxChars <= marker.length + 16) {
return value.slice(0, maxChars);
}
const budget = maxChars - marker.length;
const head = Math.min(headChars, budget);
let tail = Math.min(tailChars, Math.max(0, budget - head));
if (head + tail > budget) {
tail = Math.max(0, budget - head);
}
if (head + tail >= value.length) {
return value.slice(0, maxChars);
}
if (head + tail <= 0) {
return value.slice(0, maxChars);
}
return `${value.slice(0, head).trimEnd()}${marker}${value.slice(-tail).trimStart()}`;
}
export function compressMessagesForRequestTooLargeRetry(
messages: ModelMessage[],
): CompressMessagesForRequestTooLargeRetryResult {
let didAdjust = false;
const compressedMessages = messages.map((message) => {
const compressed = compressModelMessageForRequestRetry(message);
if (compressed !== message) didAdjust = true;
return compressed;
});
return {
messages: didAdjust ? compressedMessages : messages,
didAdjust,
};
}
function compressModelMessageForRequestRetry(message: ModelMessage): ModelMessage {
if (typeof message.content === "string") {
const content = compressAndTruncateText(message.content, RETRY_MAX_MESSAGE_TEXT_CHARS);
return content === message.content ? message : { ...message, content };
}
if (!Array.isArray(message.content)) return message;
let didAdjust = false;
const content = message.content.map((part) => {
const compressed = compressContentPartForRequestRetry(part);
if (compressed !== part) didAdjust = true;
return compressed;
});
return didAdjust ? { ...message, content } : message;
}
function compressContentPartForRequestRetry(part: unknown): unknown {
if (!part || typeof part !== "object") return part;
const record = part as Record<string, unknown>;
const type = record.type;
if (type === "text" && typeof record.text === "string") {
const text = compressAndTruncateText(record.text, RETRY_MAX_MESSAGE_TEXT_CHARS);
return text === record.text ? part : { ...record, text };
}
if (type === "tool-result") {
const output = record.output;
if (output && typeof output === "object") {
const outputRecord = output as Record<string, unknown>;
if (outputRecord.type === "text" && typeof outputRecord.value === "string") {
const value = compressAndTruncateText(outputRecord.value, RETRY_MAX_TOOL_RESULT_CHARS);
if (value === outputRecord.value) return part;
return {
...record,
output: {
...outputRecord,
value,
},
};
}
}
}
if (type === "image" && typeof record.image === "string") {
return omittedAttachmentTextPart("image", record.image, record);
}
if (type === "file" && typeof record.data === "string") {
return omittedAttachmentTextPart("file", record.data, record);
}
return part;
}
function compressAndTruncateText(value: string, maxChars: number): string {
return truncateTextWithHeadAndTail(compressVerboseText(value), maxChars);
}
function omittedAttachmentTextPart(
label: "image" | "file",
payload: string,
record: Record<string, unknown>,
): { type: "text"; text: string } {
const details = [
typeof record.filename === "string" ? `filename=${record.filename}` : undefined,
typeof record.mediaType === "string" ? `mediaType=${record.mediaType}` : undefined,
`${payload.length} chars`,
].filter(Boolean).join(", ");
return {
type: "text",
text: `[${label} attachment omitted to keep the AI request small: ${details}]`,
};
}

View File

@@ -15,7 +15,7 @@ import {
} from '../shared/toolExecutors';
import { requestApproval } from '../shared/approvalGate';
import { reserveSessionSlot } from '../shared/sessionExecutionQueue';
import { truncateTextWithHeadAndTail } from '../requestPayloadBudget';
import { truncateTextWithHeadAndTail } from '../requestPayloadCompression';
const MAX_LIVE_TERMINAL_STDOUT_CHARS = 24_000;
const MAX_LIVE_TERMINAL_STDERR_CHARS = 12_000;