fix(ai): compress Catty requests only after 413 (#1327)
Some checks failed
build-packages / dedupe push run (push) Has been cancelled
build-packages / dedupe result (push) Has been cancelled
build-packages / resolve bundled mosh-client (push) Has been cancelled
build-packages / resolve bundled et-client (push) Has been cancelled
build-packages / build-macos (push) Has been cancelled
build-packages / build-windows (push) Has been cancelled
build-packages / ${{ needs.dedupe.outputs.skip_heavy_ci == 'true' && 'deduped build-linux-x64' || 'build-linux-x64' }} (push) Has been cancelled
build-packages / ${{ needs.dedupe.outputs.skip_heavy_ci == 'true' && 'deduped build-linux-arm64' || 'build-linux-arm64' }} (push) Has been cancelled
build-packages / release (push) Has been cancelled
build-packages / bump homebrew tap (push) Has been cancelled
Some checks failed
build-packages / dedupe push run (push) Has been cancelled
build-packages / dedupe result (push) Has been cancelled
build-packages / resolve bundled mosh-client (push) Has been cancelled
build-packages / resolve bundled et-client (push) Has been cancelled
build-packages / build-macos (push) Has been cancelled
build-packages / build-windows (push) Has been cancelled
build-packages / ${{ needs.dedupe.outputs.skip_heavy_ci == 'true' && 'deduped build-linux-x64' || 'build-linux-x64' }} (push) Has been cancelled
build-packages / ${{ needs.dedupe.outputs.skip_heavy_ci == 'true' && 'deduped build-linux-arm64' || 'build-linux-arm64' }} (push) Has been cancelled
build-packages / release (push) Has been cancelled
build-packages / bump homebrew tap (push) Has been cancelled
* fix(ai): compress Catty requests only after 413 * fix(ai): retry 413 after tool progress safely * fix(ai): mark thrown 413 retries after tool progress * fix(ai): preserve tool results in 413 retry
This commit is contained in:
@@ -91,6 +91,23 @@ test("buildHistoricalToolResultReplayText keeps non-terminal tool results intact
|
||||
assert.equal(buildHistoricalToolResultReplayText(result, toolCall), "search result summary");
|
||||
});
|
||||
|
||||
test("buildHistoricalToolResultReplayText can preserve terminal output for 413 retries", () => {
|
||||
const toolCall: ToolCall = {
|
||||
id: "call-1",
|
||||
name: "terminal_execute",
|
||||
arguments: { command: "npm test" },
|
||||
};
|
||||
const result: ToolResult = {
|
||||
toolCallId: "call-1",
|
||||
content: "real terminal output",
|
||||
};
|
||||
|
||||
assert.equal(
|
||||
buildHistoricalToolResultReplayText(result, toolCall, { preserveTerminalOutput: true }),
|
||||
"real terminal output",
|
||||
);
|
||||
});
|
||||
|
||||
test("buildHistoricalToolReplayMaps pairs reused tool ids with the nearest preceding call", () => {
|
||||
const messages: ChatMessage[] = [
|
||||
{
|
||||
|
||||
@@ -112,9 +112,14 @@ function findLastIndex<T>(items: T[], predicate: (item: T) => boolean): number {
|
||||
export function buildHistoricalToolResultReplayText(
|
||||
result: ToolResult,
|
||||
toolCall?: ToolCall,
|
||||
{
|
||||
preserveTerminalOutput = false,
|
||||
}: {
|
||||
preserveTerminalOutput?: boolean;
|
||||
} = {},
|
||||
): string {
|
||||
const toolName = toolCall?.name ?? "unknown";
|
||||
if (!isTerminalToolName(toolName)) {
|
||||
if (!isTerminalToolName(toolName) || preserveTerminalOutput) {
|
||||
return result.content;
|
||||
}
|
||||
|
||||
|
||||
@@ -21,6 +21,7 @@ import type {
|
||||
ExternalAgentConfig,
|
||||
ProviderAdvancedParams,
|
||||
ProviderConfig,
|
||||
ToolResult,
|
||||
WebSearchConfig,
|
||||
} from '../../../infrastructure/ai/types';
|
||||
import { isWebSearchReady } from '../../../infrastructure/ai/types';
|
||||
@@ -36,9 +37,12 @@ import {
|
||||
resolveContextWindow,
|
||||
} from '../../../infrastructure/ai/contextCompaction';
|
||||
import {
|
||||
estimateUtf8Bytes,
|
||||
fitMessagesToRequestPayloadBudget,
|
||||
} from '../../../infrastructure/ai/requestPayloadBudget';
|
||||
compressMessagesForRequestTooLargeRetry,
|
||||
} from '../../../infrastructure/ai/requestPayloadCompression';
|
||||
import {
|
||||
createCattyRequestTooLargeRetryError,
|
||||
hadToolProgressBeforeRequestTooLarge,
|
||||
} from '../../../infrastructure/ai/cattyRequestTooLargeRetry';
|
||||
import { createModelFromConfig } from '../../../infrastructure/ai/sdk/providers';
|
||||
import { createCattyTools } from '../../../infrastructure/ai/sdk/tools';
|
||||
import type { ExecutorContext } from '../../../infrastructure/ai/cattyAgent/executor';
|
||||
@@ -50,6 +54,7 @@ import {
|
||||
buildPromptWithTerminalSelectionAttachments,
|
||||
isTerminalSelectionAttachment,
|
||||
} from '../../../application/state/terminalSelectionAttachment';
|
||||
import { latestAISessionsSnapshot } from '../../../application/state/aiStateSnapshots';
|
||||
import {
|
||||
buildHistoricalToolReplayMaps,
|
||||
buildHistoricalToolResultReplayText,
|
||||
@@ -343,7 +348,7 @@ export function useAIChatStreaming({
|
||||
// Track the current assistant message ID so updates target the correct message
|
||||
let activeMsgId = currentAssistantMsgId;
|
||||
let lastAddedRole: 'assistant' | 'tool' = 'assistant';
|
||||
let hasRetryUnsafeToolProgress = false;
|
||||
let hadToolProgress = false;
|
||||
const reader = result.fullStream.getReader();
|
||||
|
||||
// -- Text-delta batching: accumulate deltas and flush periodically --
|
||||
@@ -419,7 +424,16 @@ export function useAIChatStreaming({
|
||||
|
||||
try {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
let readResult: ReadableStreamReadResult<unknown>;
|
||||
try {
|
||||
readResult = await reader.read();
|
||||
} catch (readErr) {
|
||||
if (isRequestTooLargeError(readErr)) {
|
||||
throw createCattyRequestTooLargeRetryError(readErr, hadToolProgress);
|
||||
}
|
||||
throw readErr;
|
||||
}
|
||||
const { done, value } = readResult;
|
||||
if (done) break;
|
||||
// Use the StreamChunk union for type narrowing instead of unsafe casts
|
||||
const chunk = value as StreamChunk;
|
||||
@@ -486,7 +500,7 @@ export function useAIChatStreaming({
|
||||
cancelPendingFlush();
|
||||
flushText();
|
||||
const typedChunk = chunk as ToolCallChunk;
|
||||
hasRetryUnsafeToolProgress = true;
|
||||
hadToolProgress = true;
|
||||
const messageId = ensureAssistantMessage();
|
||||
const providerOptions = normalizeProviderContinuationOptions(typedChunk.providerMetadata);
|
||||
updateMessageById(streamSessionId, messageId, msg => ({
|
||||
@@ -512,7 +526,7 @@ export function useAIChatStreaming({
|
||||
cancelPendingFlush();
|
||||
flushText();
|
||||
const typedChunk = chunk as ToolResultChunk;
|
||||
hasRetryUnsafeToolProgress = true;
|
||||
hadToolProgress = true;
|
||||
// Mark the assistant message's tool execution as completed
|
||||
updateMessageById(streamSessionId, activeMsgId, msg =>
|
||||
msg.role === 'assistant' && msg.executionStatus === 'running'
|
||||
@@ -559,10 +573,13 @@ export function useAIChatStreaming({
|
||||
console.warn('[Catty] suppressed SDK stream state error:', typedChunk.error);
|
||||
break;
|
||||
}
|
||||
if (isRequestTooLargeError(typedChunk.error) && !hasRetryUnsafeToolProgress) {
|
||||
if (isRequestTooLargeError(typedChunk.error)) {
|
||||
cancelPendingFlush();
|
||||
flushText();
|
||||
throw typedChunk.error;
|
||||
throw createCattyRequestTooLargeRetryError(
|
||||
typedChunk.error,
|
||||
hadToolProgress,
|
||||
);
|
||||
}
|
||||
cancelPendingFlush();
|
||||
flushText();
|
||||
@@ -796,44 +813,86 @@ export function useAIChatStreaming({
|
||||
};
|
||||
|
||||
try {
|
||||
// Issue #5: Build SDK messages including tool-call and tool-result messages
|
||||
// so the LLM maintains full conversation context
|
||||
const allMessages = currentSession?.messages ?? [];
|
||||
let openAIChatAssistantFieldsByMessage = new Map<ModelMessage, OpenAIChatAssistantFields | undefined>();
|
||||
const buildSdkMessages = (
|
||||
allMessages: ChatMessage[],
|
||||
includeCurrentUserMessage: boolean,
|
||||
{
|
||||
preserveTerminalToolResults = new Set<ToolResult>(),
|
||||
}: {
|
||||
preserveTerminalToolResults?: ReadonlySet<ToolResult>;
|
||||
} = {},
|
||||
): Array<ModelMessage> => {
|
||||
const { resolvedToolCallsByAssistant, toolCallByToolResult } = buildHistoricalToolReplayMaps(allMessages);
|
||||
const nextFieldsByMessage = new Map<ModelMessage, OpenAIChatAssistantFields | undefined>();
|
||||
const sdkMessages: Array<ModelMessage> = [];
|
||||
let previousHistoryMessageWasToolResult = false;
|
||||
|
||||
const { resolvedToolCallsByAssistant, toolCallByToolResult } = buildHistoricalToolReplayMaps(allMessages);
|
||||
|
||||
const sdkMessages: Array<ModelMessage> = [];
|
||||
const openAIChatAssistantFieldsByMessage = new Map<ModelMessage, OpenAIChatAssistantFields | undefined>();
|
||||
let previousHistoryMessageWasToolResult = false;
|
||||
for (const m of allMessages) {
|
||||
const currentMessageFollowsToolResult = previousHistoryMessageWasToolResult;
|
||||
if (m.role === 'user') {
|
||||
// Historical attachments are replayed as placeholders so screenshots,
|
||||
// files, and terminal selections do not balloon every follow-up request.
|
||||
const messageAttachments = m.attachments ?? m.images;
|
||||
sdkMessages.push({
|
||||
role: 'user',
|
||||
content: buildHistoricalUserReplayContent(m.content, messageAttachments ?? []),
|
||||
});
|
||||
} else if (m.role === 'assistant') {
|
||||
const activeContinuation = isProviderContinuationForSource(
|
||||
m.providerContinuation,
|
||||
continuationContext.source,
|
||||
)
|
||||
? m.providerContinuation
|
||||
: undefined;
|
||||
const openAIChatAssistantFields = getOpenAIChatAssistantFieldsForHistoryMessage(
|
||||
m,
|
||||
continuationContext.source,
|
||||
);
|
||||
if (m.toolCalls?.length) {
|
||||
// Only include tool calls that have matching results
|
||||
const resolvedToolCalls = resolvedToolCallsByAssistant.get(m);
|
||||
const resolvedCalls = resolvedToolCalls
|
||||
? m.toolCalls.filter(tc => resolvedToolCalls.has(tc))
|
||||
: [];
|
||||
const contentParts: AssistantContentPart[] = [];
|
||||
if (resolvedCalls.length > 0) {
|
||||
for (const m of allMessages) {
|
||||
const currentMessageFollowsToolResult = previousHistoryMessageWasToolResult;
|
||||
if (m.role === 'user') {
|
||||
// Historical attachments are replayed as placeholders so screenshots,
|
||||
// files, and terminal selections do not balloon every follow-up request.
|
||||
const messageAttachments = m.attachments ?? m.images;
|
||||
sdkMessages.push({
|
||||
role: 'user',
|
||||
content: buildHistoricalUserReplayContent(m.content, messageAttachments ?? []),
|
||||
});
|
||||
} else if (m.role === 'assistant') {
|
||||
const activeContinuation = isProviderContinuationForSource(
|
||||
m.providerContinuation,
|
||||
continuationContext.source,
|
||||
)
|
||||
? m.providerContinuation
|
||||
: undefined;
|
||||
const openAIChatAssistantFields = getOpenAIChatAssistantFieldsForHistoryMessage(
|
||||
m,
|
||||
continuationContext.source,
|
||||
);
|
||||
if (m.toolCalls?.length) {
|
||||
// Only include tool calls that have matching results
|
||||
const resolvedToolCalls = resolvedToolCallsByAssistant.get(m);
|
||||
const resolvedCalls = resolvedToolCalls
|
||||
? m.toolCalls.filter(tc => resolvedToolCalls.has(tc))
|
||||
: [];
|
||||
const contentParts: AssistantContentPart[] = [];
|
||||
if (resolvedCalls.length > 0) {
|
||||
for (const part of activeContinuation?.reasoningParts ?? []) {
|
||||
if (!part.text && !part.providerOptions) continue;
|
||||
contentParts.push({
|
||||
type: 'reasoning' as const,
|
||||
text: part.text,
|
||||
...(part.providerOptions ? { providerOptions: part.providerOptions } : {}),
|
||||
});
|
||||
}
|
||||
}
|
||||
if (m.content) {
|
||||
contentParts.push({
|
||||
type: 'text' as const,
|
||||
text: m.content,
|
||||
...(activeContinuation?.textProviderOptions ? { providerOptions: activeContinuation.textProviderOptions } : {}),
|
||||
});
|
||||
}
|
||||
for (const tc of resolvedCalls) {
|
||||
const providerOptions = activeContinuation?.toolCallProviderOptionsById?.[tc.id];
|
||||
contentParts.push({
|
||||
type: 'tool-call' as const,
|
||||
toolCallId: tc.id,
|
||||
toolName: tc.name,
|
||||
input: tc.arguments ?? {},
|
||||
...(providerOptions ? { providerOptions } : {}),
|
||||
});
|
||||
}
|
||||
// If all tool calls were orphaned, just include the text content
|
||||
if (contentParts.length > 0) {
|
||||
const message: ModelMessage = { role: 'assistant', content: toAssistantModelContent(contentParts) };
|
||||
sdkMessages.push(message);
|
||||
if (resolvedCalls.length > 0) {
|
||||
rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, nextFieldsByMessage);
|
||||
}
|
||||
}
|
||||
} else if (m.content) {
|
||||
const contentParts: AssistantContentPart[] = [];
|
||||
for (const part of activeContinuation?.reasoningParts ?? []) {
|
||||
if (!part.text && !part.providerOptions) continue;
|
||||
contentParts.push({
|
||||
@@ -842,95 +901,91 @@ export function useAIChatStreaming({
|
||||
...(part.providerOptions ? { providerOptions: part.providerOptions } : {}),
|
||||
});
|
||||
}
|
||||
}
|
||||
if (m.content) {
|
||||
contentParts.push({
|
||||
type: 'text' as const,
|
||||
text: m.content,
|
||||
...(activeContinuation?.textProviderOptions ? { providerOptions: activeContinuation.textProviderOptions } : {}),
|
||||
});
|
||||
}
|
||||
for (const tc of resolvedCalls) {
|
||||
const providerOptions = activeContinuation?.toolCallProviderOptionsById?.[tc.id];
|
||||
contentParts.push({
|
||||
type: 'tool-call' as const,
|
||||
toolCallId: tc.id,
|
||||
toolName: tc.name,
|
||||
input: tc.arguments ?? {},
|
||||
...(providerOptions ? { providerOptions } : {}),
|
||||
});
|
||||
}
|
||||
// If all tool calls were orphaned, just include the text content
|
||||
if (contentParts.length > 0) {
|
||||
const message: ModelMessage = { role: 'assistant', content: toAssistantModelContent(contentParts) };
|
||||
const message: ModelMessage = {
|
||||
role: 'assistant',
|
||||
content: toAssistantModelContent(contentParts),
|
||||
};
|
||||
sdkMessages.push(message);
|
||||
if (resolvedCalls.length > 0) {
|
||||
rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, openAIChatAssistantFieldsByMessage);
|
||||
if (currentMessageFollowsToolResult) {
|
||||
rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, nextFieldsByMessage);
|
||||
}
|
||||
}
|
||||
} else if (m.content) {
|
||||
const contentParts: AssistantContentPart[] = [];
|
||||
for (const part of activeContinuation?.reasoningParts ?? []) {
|
||||
if (!part.text && !part.providerOptions) continue;
|
||||
contentParts.push({
|
||||
type: 'reasoning' as const,
|
||||
text: part.text,
|
||||
...(part.providerOptions ? { providerOptions: part.providerOptions } : {}),
|
||||
});
|
||||
}
|
||||
contentParts.push({
|
||||
type: 'text' as const,
|
||||
text: m.content,
|
||||
...(activeContinuation?.textProviderOptions ? { providerOptions: activeContinuation.textProviderOptions } : {}),
|
||||
} else if (m.role === 'tool' && m.toolResults?.length) {
|
||||
sdkMessages.push({
|
||||
role: 'tool',
|
||||
content: m.toolResults.map(tr => {
|
||||
const toolCall = toolCallByToolResult.get(tr);
|
||||
return {
|
||||
type: 'tool-result' as const,
|
||||
toolCallId: tr.toolCallId,
|
||||
toolName: toolCall?.name ?? 'unknown',
|
||||
output: {
|
||||
type: 'text' as const,
|
||||
value: buildHistoricalToolResultReplayText(tr, toolCall, {
|
||||
preserveTerminalOutput: preserveTerminalToolResults.has(tr),
|
||||
}),
|
||||
},
|
||||
};
|
||||
}),
|
||||
});
|
||||
const message: ModelMessage = {
|
||||
role: 'assistant',
|
||||
content: toAssistantModelContent(contentParts),
|
||||
};
|
||||
sdkMessages.push(message);
|
||||
if (currentMessageFollowsToolResult) {
|
||||
rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, openAIChatAssistantFieldsByMessage);
|
||||
}
|
||||
previousHistoryMessageWasToolResult = m.role === 'tool' && !!m.toolResults?.length;
|
||||
}
|
||||
|
||||
if (includeCurrentUserMessage) {
|
||||
// Build the current user message — include attachments as multimodal content
|
||||
if (attachments?.length) {
|
||||
const modelText = buildPromptWithTerminalSelectionAttachments(trimmed, attachments);
|
||||
const modelAttachments = attachments.filter(
|
||||
(attachment) => !isTerminalSelectionAttachment(attachment),
|
||||
);
|
||||
if (!modelAttachments.length) {
|
||||
sdkMessages.push({ role: 'user', content: modelText });
|
||||
} else {
|
||||
const parts: Array<{ type: 'text'; text: string } | { type: 'image'; image: string; mediaType?: string } | { type: 'file'; data: string; mediaType: string; filename?: string }> = [];
|
||||
parts.push({ type: 'text', text: modelText });
|
||||
for (const att of modelAttachments) {
|
||||
if (att.mediaType.startsWith('image/')) {
|
||||
parts.push({ type: 'image', image: att.base64Data, mediaType: att.mediaType });
|
||||
} else {
|
||||
parts.push({ type: 'file', data: att.base64Data, mediaType: att.mediaType, filename: att.filename });
|
||||
}
|
||||
}
|
||||
sdkMessages.push({ role: 'user', content: parts });
|
||||
}
|
||||
}
|
||||
} else if (m.role === 'tool' && m.toolResults?.length) {
|
||||
sdkMessages.push({
|
||||
role: 'tool',
|
||||
content: m.toolResults.map(tr => {
|
||||
const toolCall = toolCallByToolResult.get(tr);
|
||||
return {
|
||||
type: 'tool-result' as const,
|
||||
toolCallId: tr.toolCallId,
|
||||
toolName: toolCall?.name ?? 'unknown',
|
||||
output: { type: 'text' as const, value: buildHistoricalToolResultReplayText(tr, toolCall) },
|
||||
};
|
||||
}),
|
||||
});
|
||||
}
|
||||
previousHistoryMessageWasToolResult = m.role === 'tool' && !!m.toolResults?.length;
|
||||
}
|
||||
// Build the current user message — include attachments as multimodal content
|
||||
if (attachments?.length) {
|
||||
const modelText = buildPromptWithTerminalSelectionAttachments(trimmed, attachments);
|
||||
const modelAttachments = attachments.filter(
|
||||
(attachment) => !isTerminalSelectionAttachment(attachment),
|
||||
);
|
||||
if (!modelAttachments.length) {
|
||||
sdkMessages.push({ role: 'user', content: modelText });
|
||||
} else {
|
||||
const parts: Array<{ type: 'text'; text: string } | { type: 'image'; image: string; mediaType?: string } | { type: 'file'; data: string; mediaType: string; filename?: string }> = [];
|
||||
parts.push({ type: 'text', text: modelText });
|
||||
for (const att of modelAttachments) {
|
||||
if (att.mediaType.startsWith('image/')) {
|
||||
parts.push({ type: 'image', image: att.base64Data, mediaType: att.mediaType });
|
||||
} else {
|
||||
parts.push({ type: 'file', data: att.base64Data, mediaType: att.mediaType, filename: att.filename });
|
||||
sdkMessages.push({ role: 'user', content: trimmed });
|
||||
}
|
||||
}
|
||||
sdkMessages.push({ role: 'user', content: parts });
|
||||
|
||||
openAIChatAssistantFieldsByMessage = nextFieldsByMessage;
|
||||
return sdkMessages;
|
||||
};
|
||||
|
||||
const sdkMessages = buildSdkMessages(currentSession?.messages ?? [], true);
|
||||
const collectToolResultsAfterMessage = (
|
||||
messages: ChatMessage[],
|
||||
messageId: string,
|
||||
): Set<ToolResult> => {
|
||||
const results = new Set<ToolResult>();
|
||||
let afterMessage = false;
|
||||
for (const message of messages) {
|
||||
if (message.id === messageId) {
|
||||
afterMessage = true;
|
||||
continue;
|
||||
}
|
||||
if (!afterMessage || message.role !== 'tool' || !message.toolResults?.length) continue;
|
||||
for (const result of message.toolResults) {
|
||||
results.add(result);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sdkMessages.push({ role: 'user', content: trimmed });
|
||||
}
|
||||
return results;
|
||||
};
|
||||
|
||||
// Create model with placeholder API key — the main process injects the real
|
||||
// decrypted key when the HTTP request is proxied through IPC, so plaintext
|
||||
@@ -958,20 +1013,12 @@ export function useAIChatStreaming({
|
||||
defaultContextWindow: DEFAULT_CONTEXT_WINDOW_TOKENS,
|
||||
});
|
||||
const outputReserveTokens = Math.min(4096, Math.ceil(contextWindow * 0.05));
|
||||
const requestReserveTokens = outputReserveTokens + estimateUnknownTokens({
|
||||
const getRequestReserveTokens = () => outputReserveTokens + estimateUnknownTokens({
|
||||
systemPrompt,
|
||||
toolNames: Object.keys(tools),
|
||||
openAIChatAssistantFields: Array.from(openAIChatAssistantFieldsByMessage.values()),
|
||||
});
|
||||
|
||||
const payloadReservedBytes = estimateUtf8Bytes({
|
||||
system: systemPrompt,
|
||||
tools: Object.keys(tools),
|
||||
});
|
||||
const applyRequestPayloadBudget = (messages: ModelMessage[]) => fitMessagesToRequestPayloadBudget({
|
||||
messages,
|
||||
reservedBytes: payloadReservedBytes,
|
||||
});
|
||||
const summarizeForCompaction = async (messagesToSummarize: ModelMessage[]) => {
|
||||
updateLastMessage(sessionId, msg => ({ ...msg, statusText: 'Compacting earlier context...' }));
|
||||
const result = await generateText({
|
||||
@@ -999,64 +1046,64 @@ export function useAIChatStreaming({
|
||||
);
|
||||
return pruned;
|
||||
};
|
||||
const compactAndBudgetMessages = async (
|
||||
const compactMessages = async (
|
||||
messages: ModelMessage[],
|
||||
{
|
||||
force = false,
|
||||
statusText,
|
||||
trimLog,
|
||||
fallbackLog,
|
||||
compressForRequestTooLargeRetry = false,
|
||||
compressionLog,
|
||||
}: {
|
||||
force?: boolean;
|
||||
statusText?: string;
|
||||
trimLog: string;
|
||||
fallbackLog: string;
|
||||
compressForRequestTooLargeRetry?: boolean;
|
||||
compressionLog?: string;
|
||||
},
|
||||
): Promise<ModelMessage[]> => {
|
||||
const compressRetryMessages = (candidateMessages: ModelMessage[], log?: string): ModelMessage[] => {
|
||||
if (!compressForRequestTooLargeRetry) return candidateMessages;
|
||||
const compressed = compressMessagesForRequestTooLargeRetry(candidateMessages);
|
||||
if (compressed.didAdjust && log) {
|
||||
console.warn(log);
|
||||
}
|
||||
return compressed.messages;
|
||||
};
|
||||
|
||||
try {
|
||||
if (statusText) {
|
||||
updateLastMessage(sessionId, msg => ({ ...msg, statusText }));
|
||||
}
|
||||
const inputMessages = compressRetryMessages(messages, compressionLog);
|
||||
const compacted = await prepareContextCompaction({
|
||||
messages,
|
||||
messages: inputMessages,
|
||||
contextWindow,
|
||||
reservedTokens: requestReserveTokens,
|
||||
reservedTokens: getRequestReserveTokens(),
|
||||
thresholdRatio: force ? 0 : undefined,
|
||||
protectRecentMessages: DEFAULT_PROTECT_RECENT_MESSAGES,
|
||||
summarize: summarizeForCompaction,
|
||||
});
|
||||
let nextMessages = force && !compacted.didCompact
|
||||
? keepRecentContextMessages(messages, DEFAULT_PROTECT_RECENT_MESSAGES)
|
||||
? keepRecentContextMessages(inputMessages, DEFAULT_PROTECT_RECENT_MESSAGES)
|
||||
: compacted.messages;
|
||||
const budgetResult = applyRequestPayloadBudget(nextMessages);
|
||||
if (budgetResult.didAdjust) {
|
||||
console.warn(`${trimLog} ${budgetResult.estimatedBytes} bytes.`);
|
||||
nextMessages = budgetResult.messages;
|
||||
}
|
||||
return nextMessages;
|
||||
return compressRetryMessages(nextMessages);
|
||||
} catch (err) {
|
||||
if (abortController.signal.aborted) throw err;
|
||||
console.warn(fallbackLog, err);
|
||||
const fallbackBudget = applyRequestPayloadBudget(
|
||||
keepRecentContextMessages(messages, DEFAULT_PROTECT_RECENT_MESSAGES),
|
||||
);
|
||||
if (fallbackBudget.didAdjust) {
|
||||
console.warn(
|
||||
`[Catty] Request payload trimmed to ${fallbackBudget.estimatedBytes} bytes after compaction fallback.`,
|
||||
);
|
||||
const fallbackMessages = keepRecentContextMessages(messages, DEFAULT_PROTECT_RECENT_MESSAGES);
|
||||
if (!compressForRequestTooLargeRetry) {
|
||||
return fallbackMessages;
|
||||
}
|
||||
return fallbackBudget.messages;
|
||||
const compressed = compressMessagesForRequestTooLargeRetry(fallbackMessages);
|
||||
if (compressed.didAdjust) {
|
||||
console.warn('[Catty] Request content compressed after compaction fallback.');
|
||||
}
|
||||
return compressed.messages;
|
||||
}
|
||||
};
|
||||
const payloadBudgetResult = applyRequestPayloadBudget(sdkMessages);
|
||||
let messagesForStream = payloadBudgetResult.messages;
|
||||
if (payloadBudgetResult.didAdjust) {
|
||||
console.warn(
|
||||
`[Catty] Request payload trimmed to ${payloadBudgetResult.estimatedBytes} bytes to avoid HTTP 413.`,
|
||||
);
|
||||
}
|
||||
messagesForStream = await compactAndBudgetMessages(messagesForStream, {
|
||||
trimLog: '[Catty] Request payload re-trimmed after context compaction to',
|
||||
let messagesForStream = sdkMessages;
|
||||
messagesForStream = await compactMessages(messagesForStream, {
|
||||
fallbackLog: '[Catty] Context compaction failed; falling back to recent messages only:',
|
||||
});
|
||||
|
||||
@@ -1080,23 +1127,50 @@ export function useAIChatStreaming({
|
||||
}
|
||||
|
||||
console.warn('[Catty] Request hit HTTP 413; forcing context compaction and retrying once.', streamErr);
|
||||
updateMessageById(sessionId, assistantMsgId, msg => ({
|
||||
...msg,
|
||||
content: '',
|
||||
thinking: undefined,
|
||||
thinkingDurationMs: undefined,
|
||||
providerContinuation: undefined,
|
||||
toolCalls: undefined,
|
||||
errorInfo: undefined,
|
||||
executionStatus: undefined,
|
||||
pendingApproval: undefined,
|
||||
statusText: 'Request was too large. Compacting context and retrying...',
|
||||
}));
|
||||
const retryMessages = prepareMessagesForStream(await compactAndBudgetMessages(messagesForStream, {
|
||||
const statusText = 'Request was too large. Compacting context and retrying...';
|
||||
const hadToolProgress = hadToolProgressBeforeRequestTooLarge(streamErr);
|
||||
let retryBaseMessages = messagesForStream;
|
||||
let retryAssistantMsgId = assistantMsgId;
|
||||
if (hadToolProgress) {
|
||||
const latestSession = latestAISessionsSnapshot?.find(session => session.id === sessionId);
|
||||
if (latestSession) {
|
||||
retryBaseMessages = buildSdkMessages(latestSession.messages, false, {
|
||||
preserveTerminalToolResults: collectToolResultsAfterMessage(
|
||||
latestSession.messages,
|
||||
assistantMsgId,
|
||||
),
|
||||
});
|
||||
}
|
||||
retryAssistantMsgId = generateId();
|
||||
addMessageToSession(sessionId, {
|
||||
id: retryAssistantMsgId,
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
timestamp: Date.now(),
|
||||
model: activeModelId || context.activeProvider?.defaultModel || '',
|
||||
providerId: context.activeProvider?.providerId,
|
||||
statusText,
|
||||
});
|
||||
} else {
|
||||
updateMessageById(sessionId, assistantMsgId, msg => ({
|
||||
...msg,
|
||||
content: '',
|
||||
thinking: undefined,
|
||||
thinkingDurationMs: undefined,
|
||||
providerContinuation: undefined,
|
||||
toolCalls: undefined,
|
||||
errorInfo: undefined,
|
||||
executionStatus: undefined,
|
||||
pendingApproval: undefined,
|
||||
statusText,
|
||||
}));
|
||||
}
|
||||
const retryMessages = prepareMessagesForStream(await compactMessages(retryBaseMessages, {
|
||||
force: true,
|
||||
statusText: 'Request was too large. Compacting context and retrying...',
|
||||
trimLog: '[Catty] Request payload trimmed after forced context compaction to',
|
||||
statusText,
|
||||
fallbackLog: '[Catty] Forced context compaction after 413 failed; falling back to recent messages only:',
|
||||
compressForRequestTooLargeRetry: true,
|
||||
compressionLog: '[Catty] Request content compressed after forced context compaction.',
|
||||
}));
|
||||
|
||||
await processCattyStream(
|
||||
@@ -1106,7 +1180,7 @@ export function useAIChatStreaming({
|
||||
tools,
|
||||
retryMessages,
|
||||
abortController.signal,
|
||||
assistantMsgId,
|
||||
retryAssistantMsgId,
|
||||
context.activeProvider?.advancedParams,
|
||||
continuationContext,
|
||||
);
|
||||
@@ -1123,7 +1197,7 @@ export function useAIChatStreaming({
|
||||
}
|
||||
}, [
|
||||
processCattyStream, reportStreamError, setStreamingForScope,
|
||||
updateLastMessage, updateMessageById,
|
||||
addMessageToSession, updateLastMessage, updateMessageById,
|
||||
]);
|
||||
|
||||
return {
|
||||
|
||||
29
infrastructure/ai/cattyRequestTooLargeRetry.test.ts
Normal file
29
infrastructure/ai/cattyRequestTooLargeRetry.test.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
|
||||
import {
|
||||
createCattyRequestTooLargeRetryError,
|
||||
hadToolProgressBeforeRequestTooLarge,
|
||||
} from "./cattyRequestTooLargeRetry.ts";
|
||||
|
||||
test("createCattyRequestTooLargeRetryError marks 413 retry errors after tool progress", () => {
|
||||
const source = Object.assign(new Error("HTTP 413 Request Entity Too Large"), {
|
||||
status: 413,
|
||||
responseBody: "<html>too large</html>",
|
||||
});
|
||||
|
||||
const retryError = createCattyRequestTooLargeRetryError(source, true);
|
||||
|
||||
assert.equal(retryError.statusCode, 413);
|
||||
assert.equal(retryError.status, 413);
|
||||
assert.equal(retryError.responseBody, "<html>too large</html>");
|
||||
assert.equal(retryError.cause, source);
|
||||
assert.equal(hadToolProgressBeforeRequestTooLarge(retryError), true);
|
||||
});
|
||||
|
||||
test("hadToolProgressBeforeRequestTooLarge is false when no tool progress was recorded", () => {
|
||||
const retryError = createCattyRequestTooLargeRetryError("HTTP 413", false);
|
||||
|
||||
assert.equal(hadToolProgressBeforeRequestTooLarge(retryError), false);
|
||||
assert.equal(hadToolProgressBeforeRequestTooLarge(new Error("HTTP 413")), false);
|
||||
});
|
||||
34
infrastructure/ai/cattyRequestTooLargeRetry.ts
Normal file
34
infrastructure/ai/cattyRequestTooLargeRetry.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
export type CattyRequestTooLargeRetryError = Error & {
|
||||
cattyHadToolProgress?: boolean;
|
||||
statusCode?: number;
|
||||
status?: number;
|
||||
responseBody?: string;
|
||||
};
|
||||
|
||||
export function createCattyRequestTooLargeRetryError(
|
||||
error: unknown,
|
||||
hadToolProgress: boolean,
|
||||
): CattyRequestTooLargeRetryError {
|
||||
const message = error instanceof Error
|
||||
? error.message
|
||||
: String(error ?? 'Request too large');
|
||||
const retryError = new Error(message) as CattyRequestTooLargeRetryError;
|
||||
retryError.name = 'CattyRequestTooLargeRetryError';
|
||||
retryError.cause = error;
|
||||
retryError.cattyHadToolProgress = hadToolProgress;
|
||||
retryError.statusCode = 413;
|
||||
if (error && typeof error === 'object') {
|
||||
const source = error as Record<string, unknown>;
|
||||
if (typeof source.status === 'number') retryError.status = source.status;
|
||||
if (typeof source.responseBody === 'string') retryError.responseBody = source.responseBody;
|
||||
}
|
||||
return retryError;
|
||||
}
|
||||
|
||||
export function hadToolProgressBeforeRequestTooLarge(error: unknown): boolean {
|
||||
return !!(
|
||||
error &&
|
||||
typeof error === 'object' &&
|
||||
(error as { cattyHadToolProgress?: boolean }).cattyHadToolProgress
|
||||
);
|
||||
}
|
||||
@@ -1,233 +0,0 @@
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import type { ModelMessage } from "ai";
|
||||
|
||||
import {
|
||||
DEFAULT_MAX_REQUEST_PAYLOAD_BYTES,
|
||||
compressVerboseText,
|
||||
estimateUtf8Bytes,
|
||||
fitMessagesToRequestPayloadBudget,
|
||||
truncateTextWithHeadAndTail,
|
||||
} from "./requestPayloadBudget.ts";
|
||||
|
||||
test("compressVerboseText collapses repeated blank lines and duplicate runs", () => {
|
||||
const input = "line1\n\n\n\n\nline2\nsame\nsame\nsame\nsame\nline3";
|
||||
const output = compressVerboseText(input);
|
||||
assert.match(output, /line1\n\n\nline2/);
|
||||
assert.ok(output.split("\nsame\n").length <= 3);
|
||||
});
|
||||
|
||||
test("truncateTextWithHeadAndTail keeps both ends of long terminal output", () => {
|
||||
const value = `${"A".repeat(500)}${"B".repeat(20_000)}${"C".repeat(500)}`;
|
||||
const truncated = truncateTextWithHeadAndTail(value, 2_000);
|
||||
assert.ok(truncated.startsWith("AAA"));
|
||||
assert.ok(truncated.includes("[... output truncated for request size ...]"));
|
||||
assert.ok(truncated.endsWith("CCC"));
|
||||
assert.ok(truncated.length <= 2_000);
|
||||
});
|
||||
|
||||
test("fitMessagesToRequestPayloadBudget truncates verbose tool results before dropping recent turns", () => {
|
||||
const messages: ModelMessage[] = [
|
||||
{ role: "user", content: "run build" },
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{
|
||||
type: "tool-call",
|
||||
toolCallId: "call-1",
|
||||
toolName: "terminal_execute",
|
||||
input: { command: "npm run build" },
|
||||
}],
|
||||
},
|
||||
{
|
||||
role: "tool",
|
||||
content: [{
|
||||
type: "tool-result",
|
||||
toolCallId: "call-1",
|
||||
toolName: "terminal_execute",
|
||||
output: { type: "text", value: "X".repeat(200_000) },
|
||||
}],
|
||||
},
|
||||
{ role: "user", content: "what failed?" },
|
||||
];
|
||||
|
||||
const result = fitMessagesToRequestPayloadBudget({
|
||||
messages,
|
||||
maxPayloadBytes: 20_000,
|
||||
reservedBytes: 2_000,
|
||||
maxToolResultChars: 4_000,
|
||||
protectRecentMessages: 4,
|
||||
});
|
||||
|
||||
assert.equal(result.messages.length, 4);
|
||||
const toolMessage = result.messages[2];
|
||||
assert.equal(toolMessage.role, "tool");
|
||||
assert.ok(Array.isArray(toolMessage.content));
|
||||
const toolPart = toolMessage.content[0] as { output?: { value?: string } };
|
||||
assert.ok((toolPart.output?.value?.length ?? 0) < 5_000);
|
||||
assert.ok(result.estimatedBytes <= 20_000);
|
||||
});
|
||||
|
||||
test("fitMessagesToRequestPayloadBudget drops older turns when truncation alone is insufficient", () => {
|
||||
const messages: ModelMessage[] = [];
|
||||
for (let turn = 0; turn < 12; turn += 1) {
|
||||
messages.push({ role: "user", content: `question ${turn}` });
|
||||
messages.push({ role: "assistant", content: `answer ${turn} ${"Z".repeat(20_000)}` });
|
||||
}
|
||||
messages.push({ role: "user", content: "latest question" });
|
||||
|
||||
const result = fitMessagesToRequestPayloadBudget({
|
||||
messages,
|
||||
maxPayloadBytes: 8_000,
|
||||
reservedBytes: 500,
|
||||
protectRecentMessages: 4,
|
||||
maxMessageTextChars: 2_000,
|
||||
});
|
||||
|
||||
assert.ok(result.messages.length < messages.length);
|
||||
assert.equal(result.messages.at(-1)?.role, "user");
|
||||
assert.match(String(result.messages.at(-1)?.content ?? ""), /latest question/);
|
||||
assert.ok(result.estimatedBytes <= 8_000);
|
||||
});
|
||||
|
||||
test("estimateUtf8Bytes measures JSON payload size in UTF-8 bytes", () => {
|
||||
const bytes = estimateUtf8Bytes({ text: "caf\u00e9" });
|
||||
assert.ok(bytes > 8);
|
||||
});
|
||||
|
||||
test("estimateUtf8Bytes works in renderer-like environments without Buffer", () => {
|
||||
const originalBuffer = globalThis.Buffer;
|
||||
try {
|
||||
(globalThis as typeof globalThis & { Buffer?: typeof Buffer }).Buffer = undefined;
|
||||
assert.equal(estimateUtf8Bytes({ text: "caf\u00e9" }), new TextEncoder().encode(JSON.stringify({ text: "caf\u00e9" })).byteLength);
|
||||
} finally {
|
||||
(globalThis as typeof globalThis & { Buffer?: typeof Buffer }).Buffer = originalBuffer;
|
||||
}
|
||||
});
|
||||
|
||||
test("default payload budget remains a general gateway guard", () => {
|
||||
assert.equal(DEFAULT_MAX_REQUEST_PAYLOAD_BYTES, 1_500_000);
|
||||
});
|
||||
|
||||
test("fitMessagesToRequestPayloadBudget preserves current long text when the request is under budget", () => {
|
||||
const currentText = "CURRENT ".repeat(4_000);
|
||||
const result = fitMessagesToRequestPayloadBudget({
|
||||
messages: [{ role: "user", content: currentText }],
|
||||
maxPayloadBytes: 100_000,
|
||||
});
|
||||
|
||||
assert.equal(result.didAdjust, false);
|
||||
assert.equal(result.messages[0].content, currentText);
|
||||
});
|
||||
|
||||
test("fitMessagesToRequestPayloadBudget reports didAdjust when initial truncation succeeds", () => {
|
||||
const messages: ModelMessage[] = [
|
||||
{ role: "user", content: "run build" },
|
||||
{
|
||||
role: "tool",
|
||||
content: [{
|
||||
type: "tool-result",
|
||||
toolCallId: "call-1",
|
||||
toolName: "terminal_execute",
|
||||
output: { type: "text", value: "X".repeat(200_000) },
|
||||
}],
|
||||
},
|
||||
];
|
||||
|
||||
const result = fitMessagesToRequestPayloadBudget({
|
||||
messages,
|
||||
maxPayloadBytes: 20_000,
|
||||
reservedBytes: 2_000,
|
||||
});
|
||||
|
||||
assert.equal(result.didAdjust, true);
|
||||
assert.ok(result.estimatedBytes <= 20_000);
|
||||
});
|
||||
|
||||
test("fitMessagesToRequestPayloadBudget keeps dropping messages after emergency caps when still over budget", () => {
|
||||
const messages: ModelMessage[] = [];
|
||||
for (let turn = 0; turn < 8; turn += 1) {
|
||||
messages.push({ role: "user", content: `question ${turn} ${"Q".repeat(5_000)}` });
|
||||
messages.push({ role: "assistant", content: `answer ${turn} ${"A".repeat(5_000)}` });
|
||||
}
|
||||
|
||||
const result = fitMessagesToRequestPayloadBudget({
|
||||
messages,
|
||||
maxPayloadBytes: 5_000,
|
||||
protectRecentMessages: 8,
|
||||
maxMessageTextChars: 2_000,
|
||||
});
|
||||
|
||||
assert.ok(result.messages.length < messages.length);
|
||||
assert.ok(result.estimatedBytes <= 5_000);
|
||||
});
|
||||
|
||||
test("fitMessagesToRequestPayloadBudget shrinks a single oversized message for very small budgets", () => {
|
||||
const result = fitMessagesToRequestPayloadBudget({
|
||||
messages: [{ role: "assistant", content: "Z".repeat(1_000_000) }],
|
||||
maxPayloadBytes: 1_000,
|
||||
maxMessageTextChars: 500,
|
||||
});
|
||||
|
||||
assert.equal(result.messages.length, 1);
|
||||
assert.ok(result.estimatedBytes <= 1_000);
|
||||
});
|
||||
|
||||
test("fitMessagesToRequestPayloadBudget returns empty messages when budget is fully reserved", () => {
|
||||
const result = fitMessagesToRequestPayloadBudget({
|
||||
messages: [{ role: "user", content: "hello" }],
|
||||
maxPayloadBytes: 100,
|
||||
reservedBytes: 200,
|
||||
});
|
||||
|
||||
assert.deepEqual(result.messages, []);
|
||||
assert.equal(result.didAdjust, true);
|
||||
assert.equal(result.estimatedBytes, 0);
|
||||
});
|
||||
|
||||
test("fitMessagesToRequestPayloadBudget omits latest attachments only when they are still over budget at the last resort", () => {
|
||||
const result = fitMessagesToRequestPayloadBudget({
|
||||
messages: [{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: "please inspect this image" },
|
||||
{ type: "image", image: "A".repeat(1_000_000), mediaType: "image/png" },
|
||||
],
|
||||
}],
|
||||
maxPayloadBytes: 20_000,
|
||||
});
|
||||
|
||||
assert.ok(result.estimatedBytes <= 20_000);
|
||||
assert.equal(result.messages.length, 1);
|
||||
const content = result.messages[0].content;
|
||||
assert.ok(Array.isArray(content));
|
||||
assert.deepEqual(content[1], {
|
||||
type: "text",
|
||||
text: "[image attachment omitted to keep the AI request small: mediaType=image/png, 1000000 chars]",
|
||||
});
|
||||
});
|
||||
|
||||
test("fitMessagesToRequestPayloadBudget omits older oversized attachment payloads as a last resort", () => {
|
||||
const result = fitMessagesToRequestPayloadBudget({
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: "older image" },
|
||||
{ type: "image", image: "A".repeat(1_000_000), mediaType: "image/png" },
|
||||
],
|
||||
},
|
||||
{ role: "user", content: "current question" },
|
||||
],
|
||||
maxPayloadBytes: 20_000,
|
||||
protectRecentMessages: 2,
|
||||
});
|
||||
|
||||
assert.ok(result.estimatedBytes <= 20_000);
|
||||
assert.equal(result.messages.length, 2);
|
||||
const content = result.messages[0].content;
|
||||
assert.ok(Array.isArray(content));
|
||||
assert.deepEqual(content[1], {
|
||||
type: "text",
|
||||
text: "[image attachment omitted to keep the AI request small: mediaType=image/png, 1000000 chars]",
|
||||
});
|
||||
});
|
||||
@@ -1,335 +0,0 @@
|
||||
import type { ModelMessage } from "ai";
|
||||
import { findSafeCompactionSplitIndex } from "./contextCompaction";
|
||||
|
||||
/** Stay below typical nginx `client_max_body_size` defaults (often 1-2 MB). */
|
||||
export const DEFAULT_MAX_REQUEST_PAYLOAD_BYTES = 1_500_000;
|
||||
/** Per tool-result text cap before the sliding window drops older turns. */
|
||||
export const DEFAULT_MAX_TOOL_RESULT_CHARS = 12_000;
|
||||
/** Per plain user/assistant text cap inside a single history message. */
|
||||
export const DEFAULT_MAX_MESSAGE_TEXT_CHARS = 24_000;
|
||||
/** Keep this many recent messages while trimming payload size. */
|
||||
export const DEFAULT_PROTECT_RECENT_PAYLOAD_MESSAGES = 8;
|
||||
|
||||
const TRUNCATION_MARKER = "\n\n[... output truncated for request size ...]\n\n";
|
||||
const HEAD_CHARS = 800;
|
||||
const TAIL_CHARS = 4_000;
|
||||
|
||||
export interface FitMessagesToRequestPayloadBudgetInput {
|
||||
messages: ModelMessage[];
|
||||
maxPayloadBytes?: number;
|
||||
reservedBytes?: number;
|
||||
maxToolResultChars?: number;
|
||||
maxMessageTextChars?: number;
|
||||
protectRecentMessages?: number;
|
||||
preserveLatestMessage?: boolean;
|
||||
}
|
||||
|
||||
export interface FitMessagesToRequestPayloadBudgetResult {
|
||||
messages: ModelMessage[];
|
||||
didAdjust: boolean;
|
||||
estimatedBytes: number;
|
||||
}
|
||||
|
||||
export function estimateUtf8Bytes(value: unknown): number {
|
||||
const text = stringifyForByteEstimate(value);
|
||||
return utf8ByteLength(text);
|
||||
}
|
||||
|
||||
function stringifyForByteEstimate(value: unknown): string {
|
||||
try {
|
||||
return JSON.stringify(value);
|
||||
} catch {
|
||||
return String(value ?? "");
|
||||
}
|
||||
}
|
||||
|
||||
function utf8ByteLength(value: string | undefined): number {
|
||||
const text = value ?? "";
|
||||
if (typeof Buffer !== "undefined" && typeof Buffer.byteLength === "function") {
|
||||
return Buffer.byteLength(text, "utf8");
|
||||
}
|
||||
return new TextEncoder().encode(text).byteLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collapse noisy terminal/build output before measuring payload size.
|
||||
* Keeps semantics while removing repeated blank lines and long duplicate runs.
|
||||
*/
|
||||
export function compressVerboseText(value: string): string {
|
||||
if (!value) return value;
|
||||
|
||||
let compressed = value.replace(/\r\n/g, "\n");
|
||||
compressed = compressed.replace(/\n{4,}/g, "\n\n\n");
|
||||
|
||||
const lines = compressed.split("\n");
|
||||
const deduped: string[] = [];
|
||||
let repeatCount = 0;
|
||||
for (const line of lines) {
|
||||
const previous = deduped[deduped.length - 1];
|
||||
if (previous === line) {
|
||||
repeatCount += 1;
|
||||
if (repeatCount <= 2) deduped.push(line);
|
||||
continue;
|
||||
}
|
||||
repeatCount = 0;
|
||||
deduped.push(line);
|
||||
}
|
||||
|
||||
return deduped.join("\n");
|
||||
}
|
||||
|
||||
export function truncateTextWithHeadAndTail(
|
||||
value: string,
|
||||
maxChars: number,
|
||||
{
|
||||
headChars = HEAD_CHARS,
|
||||
tailChars = TAIL_CHARS,
|
||||
marker = TRUNCATION_MARKER,
|
||||
}: {
|
||||
headChars?: number;
|
||||
tailChars?: number;
|
||||
marker?: string;
|
||||
} = {},
|
||||
): string {
|
||||
if (value.length <= maxChars) return value;
|
||||
if (maxChars <= marker.length + 16) {
|
||||
return value.slice(0, maxChars);
|
||||
}
|
||||
|
||||
const budget = maxChars - marker.length;
|
||||
let head = Math.min(headChars, budget);
|
||||
let tail = Math.min(tailChars, Math.max(0, budget - head));
|
||||
if (head + tail > budget) {
|
||||
tail = Math.max(0, budget - head);
|
||||
}
|
||||
if (head + tail >= value.length) {
|
||||
return value.slice(0, maxChars);
|
||||
}
|
||||
if (head + tail <= 0) {
|
||||
return value.slice(0, maxChars);
|
||||
}
|
||||
|
||||
return `${value.slice(0, head).trimEnd()}${marker}${value.slice(-tail).trimStart()}`;
|
||||
}
|
||||
|
||||
export function truncateModelMessageForPayload(
|
||||
message: ModelMessage,
|
||||
{
|
||||
maxToolResultChars = DEFAULT_MAX_TOOL_RESULT_CHARS,
|
||||
maxMessageTextChars = DEFAULT_MAX_MESSAGE_TEXT_CHARS,
|
||||
omitLargeAttachments = false,
|
||||
preserveContent = false,
|
||||
}: {
|
||||
maxToolResultChars?: number;
|
||||
maxMessageTextChars?: number;
|
||||
omitLargeAttachments?: boolean;
|
||||
preserveContent?: boolean;
|
||||
} = {},
|
||||
): ModelMessage {
|
||||
if (preserveContent) return message;
|
||||
|
||||
if (typeof message.content === "string") {
|
||||
const compressed = compressVerboseText(message.content);
|
||||
return {
|
||||
...message,
|
||||
content: truncateTextWithHeadAndTail(compressed, maxMessageTextChars),
|
||||
};
|
||||
}
|
||||
|
||||
if (!Array.isArray(message.content)) return message;
|
||||
|
||||
return {
|
||||
...message,
|
||||
content: message.content.map((part) => truncateContentPartForPayload(part, {
|
||||
maxToolResultChars,
|
||||
maxMessageTextChars,
|
||||
omitLargeAttachments,
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
function truncateContentPartForPayload(
|
||||
part: unknown,
|
||||
limits: {
|
||||
maxToolResultChars: number;
|
||||
maxMessageTextChars: number;
|
||||
omitLargeAttachments: boolean;
|
||||
},
|
||||
): unknown {
|
||||
if (!part || typeof part !== "object") return part;
|
||||
const record = part as Record<string, unknown>;
|
||||
const type = record.type;
|
||||
|
||||
if (type === "text" && typeof record.text === "string") {
|
||||
const compressed = compressVerboseText(record.text);
|
||||
return {
|
||||
...record,
|
||||
text: truncateTextWithHeadAndTail(compressed, limits.maxMessageTextChars),
|
||||
};
|
||||
}
|
||||
|
||||
if (type === "tool-result") {
|
||||
const output = record.output;
|
||||
if (output && typeof output === "object") {
|
||||
const outputRecord = output as Record<string, unknown>;
|
||||
if (outputRecord.type === "text" && typeof outputRecord.value === "string") {
|
||||
const compressed = compressVerboseText(outputRecord.value);
|
||||
return {
|
||||
...record,
|
||||
output: {
|
||||
...outputRecord,
|
||||
value: truncateTextWithHeadAndTail(compressed, limits.maxToolResultChars),
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (limits.omitLargeAttachments && type === "image" && typeof record.image === "string") {
|
||||
return omittedAttachmentTextPart("image", record.image, record);
|
||||
}
|
||||
|
||||
if (limits.omitLargeAttachments && type === "file" && typeof record.data === "string") {
|
||||
return omittedAttachmentTextPart("file", record.data, record);
|
||||
}
|
||||
|
||||
return part;
|
||||
}
|
||||
|
||||
function omittedAttachmentTextPart(
|
||||
label: "image" | "file",
|
||||
payload: string,
|
||||
record: Record<string, unknown>,
|
||||
): { type: "text"; text: string } {
|
||||
const details = [
|
||||
typeof record.filename === "string" ? `filename=${record.filename}` : undefined,
|
||||
typeof record.mediaType === "string" ? `mediaType=${record.mediaType}` : undefined,
|
||||
`${payload.length} chars`,
|
||||
].filter(Boolean).join(", ");
|
||||
|
||||
return {
|
||||
type: "text",
|
||||
text: `[${label} attachment omitted to keep the AI request small: ${details}]`,
|
||||
};
|
||||
}
|
||||
|
||||
export function fitMessagesToRequestPayloadBudget({
|
||||
messages,
|
||||
maxPayloadBytes = DEFAULT_MAX_REQUEST_PAYLOAD_BYTES,
|
||||
reservedBytes = 0,
|
||||
maxToolResultChars = DEFAULT_MAX_TOOL_RESULT_CHARS,
|
||||
maxMessageTextChars = DEFAULT_MAX_MESSAGE_TEXT_CHARS,
|
||||
protectRecentMessages = DEFAULT_PROTECT_RECENT_PAYLOAD_MESSAGES,
|
||||
preserveLatestMessage = true,
|
||||
}: FitMessagesToRequestPayloadBudgetInput): FitMessagesToRequestPayloadBudgetResult {
|
||||
const budget = Math.max(0, maxPayloadBytes - Math.max(0, reservedBytes));
|
||||
if (budget === 0) {
|
||||
return { messages: [], didAdjust: messages.length > 0, estimatedBytes: 0 };
|
||||
}
|
||||
const originalBytes = estimateUtf8Bytes(messages);
|
||||
if (originalBytes <= budget) {
|
||||
return { messages, didAdjust: false, estimatedBytes: originalBytes };
|
||||
}
|
||||
|
||||
const shouldPreserveMessage = (message: ModelMessage, index: number, list: ModelMessage[]) => (
|
||||
preserveLatestMessage && index === list.length - 1 && message.role === "user"
|
||||
);
|
||||
|
||||
let adjusted = messages.map((message, index) => truncateModelMessageForPayload(message, {
|
||||
maxToolResultChars,
|
||||
maxMessageTextChars,
|
||||
preserveContent: shouldPreserveMessage(message, index, messages),
|
||||
}));
|
||||
let estimatedBytes = estimateUtf8Bytes(adjusted);
|
||||
let didAdjust = estimatedBytes !== originalBytes;
|
||||
if (estimatedBytes <= budget) {
|
||||
return { messages: adjusted, didAdjust, estimatedBytes };
|
||||
}
|
||||
|
||||
const toolResultCaps = [
|
||||
maxToolResultChars,
|
||||
Math.floor(maxToolResultChars * 0.6),
|
||||
Math.floor(maxToolResultChars * 0.35),
|
||||
4_000,
|
||||
2_000,
|
||||
1_000,
|
||||
];
|
||||
const messageTextCaps = [
|
||||
maxMessageTextChars,
|
||||
Math.floor(maxMessageTextChars * 0.6),
|
||||
Math.floor(maxMessageTextChars * 0.35),
|
||||
8_000,
|
||||
4_000,
|
||||
2_000,
|
||||
];
|
||||
|
||||
for (let i = 1; i < toolResultCaps.length; i += 1) {
|
||||
adjusted = adjusted.map((message, index) => truncateModelMessageForPayload(message, {
|
||||
maxToolResultChars: toolResultCaps[i],
|
||||
maxMessageTextChars: messageTextCaps[i],
|
||||
preserveContent: shouldPreserveMessage(message, index, adjusted),
|
||||
}));
|
||||
estimatedBytes = estimateUtf8Bytes(adjusted);
|
||||
didAdjust = true;
|
||||
if (estimatedBytes <= budget) {
|
||||
return { messages: adjusted, didAdjust, estimatedBytes };
|
||||
}
|
||||
}
|
||||
|
||||
let working = [...adjusted];
|
||||
while (working.length > protectRecentMessages) {
|
||||
const splitAt = findSafeCompactionSplitIndex(working, protectRecentMessages);
|
||||
if (splitAt <= 0) break;
|
||||
working = working.slice(splitAt);
|
||||
estimatedBytes = estimateUtf8Bytes(working);
|
||||
didAdjust = true;
|
||||
if (estimatedBytes <= budget) {
|
||||
return { messages: working, didAdjust, estimatedBytes };
|
||||
}
|
||||
}
|
||||
|
||||
const emergencyToolCap = 600;
|
||||
const emergencyTextCap = 1_200;
|
||||
working = working.map((message, index) => truncateModelMessageForPayload(message, {
|
||||
maxToolResultChars: emergencyToolCap,
|
||||
maxMessageTextChars: emergencyTextCap,
|
||||
omitLargeAttachments: true,
|
||||
preserveContent: shouldPreserveMessage(message, index, working),
|
||||
}));
|
||||
estimatedBytes = estimateUtf8Bytes(working);
|
||||
didAdjust = true;
|
||||
|
||||
let emergencyProtect = Math.min(protectRecentMessages, working.length);
|
||||
while (estimatedBytes > budget && working.length > 1) {
|
||||
emergencyProtect = Math.max(1, emergencyProtect - 1);
|
||||
const splitAt = findSafeCompactionSplitIndex(working, emergencyProtect);
|
||||
if (splitAt <= 0) {
|
||||
working = working.slice(-1);
|
||||
} else {
|
||||
working = working.slice(splitAt);
|
||||
}
|
||||
working = working.map((message, index) => truncateModelMessageForPayload(message, {
|
||||
maxToolResultChars: emergencyToolCap,
|
||||
maxMessageTextChars: emergencyTextCap,
|
||||
omitLargeAttachments: true,
|
||||
preserveContent: shouldPreserveMessage(message, index, working),
|
||||
}));
|
||||
estimatedBytes = estimateUtf8Bytes(working);
|
||||
}
|
||||
|
||||
let finalTextCap = emergencyTextCap;
|
||||
let finalToolCap = emergencyToolCap;
|
||||
while (estimatedBytes > budget && (finalTextCap > 32 || finalToolCap > 32)) {
|
||||
finalTextCap = Math.max(32, Math.floor(finalTextCap * 0.6));
|
||||
finalToolCap = Math.max(32, Math.floor(finalToolCap * 0.6));
|
||||
working = working.map((message) => truncateModelMessageForPayload(message, {
|
||||
maxToolResultChars: finalToolCap,
|
||||
maxMessageTextChars: finalTextCap,
|
||||
omitLargeAttachments: true,
|
||||
preserveContent: false,
|
||||
}));
|
||||
estimatedBytes = estimateUtf8Bytes(working);
|
||||
}
|
||||
|
||||
return { messages: working, didAdjust, estimatedBytes };
|
||||
}
|
||||
74
infrastructure/ai/requestPayloadCompression.test.ts
Normal file
74
infrastructure/ai/requestPayloadCompression.test.ts
Normal file
@@ -0,0 +1,74 @@
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import type { ModelMessage } from "ai";
|
||||
|
||||
import {
|
||||
compressMessagesForRequestTooLargeRetry,
|
||||
compressVerboseText,
|
||||
truncateTextWithHeadAndTail,
|
||||
} from "./requestPayloadCompression.ts";
|
||||
|
||||
test("compressVerboseText collapses repeated blank lines and duplicate runs", () => {
|
||||
const input = "line1\n\n\n\n\nline2\nsame\nsame\nsame\nsame\nline3";
|
||||
const output = compressVerboseText(input);
|
||||
assert.match(output, /line1\n\n\nline2/);
|
||||
assert.ok(output.split("\nsame\n").length <= 3);
|
||||
});
|
||||
|
||||
test("truncateTextWithHeadAndTail keeps both ends of long terminal output", () => {
|
||||
const value = `${"A".repeat(500)}${"B".repeat(20_000)}${"C".repeat(500)}`;
|
||||
const truncated = truncateTextWithHeadAndTail(value, 2_000);
|
||||
assert.ok(truncated.startsWith("AAA"));
|
||||
assert.ok(truncated.includes("[... output truncated for request size ...]"));
|
||||
assert.ok(truncated.endsWith("CCC"));
|
||||
assert.ok(truncated.length <= 2_000);
|
||||
});
|
||||
|
||||
test("compressMessagesForRequestTooLargeRetry compresses messages without enforcing a byte budget", () => {
|
||||
const messages: ModelMessage[] = [
|
||||
{ role: "user", content: "run build" },
|
||||
{
|
||||
role: "tool",
|
||||
content: [{
|
||||
type: "tool-result",
|
||||
toolCallId: "call-1",
|
||||
toolName: "terminal_execute",
|
||||
output: { type: "text", value: "X".repeat(200_000) },
|
||||
}],
|
||||
},
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: "please inspect this image" },
|
||||
{ type: "image", image: "A".repeat(1_000_000), mediaType: "image/png" },
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
const result = compressMessagesForRequestTooLargeRetry(messages);
|
||||
|
||||
assert.equal(result.didAdjust, true);
|
||||
assert.deepEqual(Object.keys(result).sort(), ["didAdjust", "messages"]);
|
||||
assert.equal(result.messages.length, messages.length);
|
||||
|
||||
const toolContent = result.messages[1].content;
|
||||
assert.ok(Array.isArray(toolContent));
|
||||
const toolPart = toolContent[0] as { output?: { value?: string } };
|
||||
assert.ok((toolPart.output?.value?.length ?? 0) < 5_000);
|
||||
|
||||
const userContent = result.messages[2].content;
|
||||
assert.ok(Array.isArray(userContent));
|
||||
assert.deepEqual(userContent[1], {
|
||||
type: "text",
|
||||
text: "[image attachment omitted to keep the AI request small: mediaType=image/png, 1000000 chars]",
|
||||
});
|
||||
});
|
||||
|
||||
test("compressMessagesForRequestTooLargeRetry reports no adjustment for compact messages", () => {
|
||||
const messages: ModelMessage[] = [{ role: "user", content: "hello" }];
|
||||
|
||||
const result = compressMessagesForRequestTooLargeRetry(messages);
|
||||
|
||||
assert.equal(result.didAdjust, false);
|
||||
assert.deepEqual(result.messages, messages);
|
||||
});
|
||||
167
infrastructure/ai/requestPayloadCompression.ts
Normal file
167
infrastructure/ai/requestPayloadCompression.ts
Normal file
@@ -0,0 +1,167 @@
|
||||
import type { ModelMessage } from "ai";
|
||||
|
||||
const RETRY_MAX_TOOL_RESULT_CHARS = 4_000;
|
||||
const RETRY_MAX_MESSAGE_TEXT_CHARS = 8_000;
|
||||
const TRUNCATION_MARKER = "\n\n[... output truncated for request size ...]\n\n";
|
||||
const HEAD_CHARS = 800;
|
||||
const TAIL_CHARS = 4_000;
|
||||
|
||||
export interface CompressMessagesForRequestTooLargeRetryResult {
|
||||
messages: ModelMessage[];
|
||||
didAdjust: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collapse noisy terminal/build output.
|
||||
* Keeps semantics while removing repeated blank lines and long duplicate runs.
|
||||
*/
|
||||
export function compressVerboseText(value: string): string {
|
||||
if (!value) return value;
|
||||
|
||||
let compressed = value.replace(/\r\n/g, "\n");
|
||||
compressed = compressed.replace(/\n{4,}/g, "\n\n\n");
|
||||
|
||||
const lines = compressed.split("\n");
|
||||
const deduped: string[] = [];
|
||||
let repeatCount = 0;
|
||||
for (const line of lines) {
|
||||
const previous = deduped[deduped.length - 1];
|
||||
if (previous === line) {
|
||||
repeatCount += 1;
|
||||
if (repeatCount <= 2) deduped.push(line);
|
||||
continue;
|
||||
}
|
||||
repeatCount = 0;
|
||||
deduped.push(line);
|
||||
}
|
||||
|
||||
return deduped.join("\n");
|
||||
}
|
||||
|
||||
export function truncateTextWithHeadAndTail(
|
||||
value: string,
|
||||
maxChars: number,
|
||||
{
|
||||
headChars = HEAD_CHARS,
|
||||
tailChars = TAIL_CHARS,
|
||||
marker = TRUNCATION_MARKER,
|
||||
}: {
|
||||
headChars?: number;
|
||||
tailChars?: number;
|
||||
marker?: string;
|
||||
} = {},
|
||||
): string {
|
||||
if (value.length <= maxChars) return value;
|
||||
if (maxChars <= marker.length + 16) {
|
||||
return value.slice(0, maxChars);
|
||||
}
|
||||
|
||||
const budget = maxChars - marker.length;
|
||||
const head = Math.min(headChars, budget);
|
||||
let tail = Math.min(tailChars, Math.max(0, budget - head));
|
||||
if (head + tail > budget) {
|
||||
tail = Math.max(0, budget - head);
|
||||
}
|
||||
if (head + tail >= value.length) {
|
||||
return value.slice(0, maxChars);
|
||||
}
|
||||
if (head + tail <= 0) {
|
||||
return value.slice(0, maxChars);
|
||||
}
|
||||
|
||||
return `${value.slice(0, head).trimEnd()}${marker}${value.slice(-tail).trimStart()}`;
|
||||
}
|
||||
|
||||
export function compressMessagesForRequestTooLargeRetry(
|
||||
messages: ModelMessage[],
|
||||
): CompressMessagesForRequestTooLargeRetryResult {
|
||||
let didAdjust = false;
|
||||
const compressedMessages = messages.map((message) => {
|
||||
const compressed = compressModelMessageForRequestRetry(message);
|
||||
if (compressed !== message) didAdjust = true;
|
||||
return compressed;
|
||||
});
|
||||
|
||||
return {
|
||||
messages: didAdjust ? compressedMessages : messages,
|
||||
didAdjust,
|
||||
};
|
||||
}
|
||||
|
||||
function compressModelMessageForRequestRetry(message: ModelMessage): ModelMessage {
|
||||
if (typeof message.content === "string") {
|
||||
const content = compressAndTruncateText(message.content, RETRY_MAX_MESSAGE_TEXT_CHARS);
|
||||
return content === message.content ? message : { ...message, content };
|
||||
}
|
||||
|
||||
if (!Array.isArray(message.content)) return message;
|
||||
|
||||
let didAdjust = false;
|
||||
const content = message.content.map((part) => {
|
||||
const compressed = compressContentPartForRequestRetry(part);
|
||||
if (compressed !== part) didAdjust = true;
|
||||
return compressed;
|
||||
});
|
||||
|
||||
return didAdjust ? { ...message, content } : message;
|
||||
}
|
||||
|
||||
function compressContentPartForRequestRetry(part: unknown): unknown {
|
||||
if (!part || typeof part !== "object") return part;
|
||||
const record = part as Record<string, unknown>;
|
||||
const type = record.type;
|
||||
|
||||
if (type === "text" && typeof record.text === "string") {
|
||||
const text = compressAndTruncateText(record.text, RETRY_MAX_MESSAGE_TEXT_CHARS);
|
||||
return text === record.text ? part : { ...record, text };
|
||||
}
|
||||
|
||||
if (type === "tool-result") {
|
||||
const output = record.output;
|
||||
if (output && typeof output === "object") {
|
||||
const outputRecord = output as Record<string, unknown>;
|
||||
if (outputRecord.type === "text" && typeof outputRecord.value === "string") {
|
||||
const value = compressAndTruncateText(outputRecord.value, RETRY_MAX_TOOL_RESULT_CHARS);
|
||||
if (value === outputRecord.value) return part;
|
||||
return {
|
||||
...record,
|
||||
output: {
|
||||
...outputRecord,
|
||||
value,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (type === "image" && typeof record.image === "string") {
|
||||
return omittedAttachmentTextPart("image", record.image, record);
|
||||
}
|
||||
|
||||
if (type === "file" && typeof record.data === "string") {
|
||||
return omittedAttachmentTextPart("file", record.data, record);
|
||||
}
|
||||
|
||||
return part;
|
||||
}
|
||||
|
||||
function compressAndTruncateText(value: string, maxChars: number): string {
|
||||
return truncateTextWithHeadAndTail(compressVerboseText(value), maxChars);
|
||||
}
|
||||
|
||||
function omittedAttachmentTextPart(
|
||||
label: "image" | "file",
|
||||
payload: string,
|
||||
record: Record<string, unknown>,
|
||||
): { type: "text"; text: string } {
|
||||
const details = [
|
||||
typeof record.filename === "string" ? `filename=${record.filename}` : undefined,
|
||||
typeof record.mediaType === "string" ? `mediaType=${record.mediaType}` : undefined,
|
||||
`${payload.length} chars`,
|
||||
].filter(Boolean).join(", ");
|
||||
|
||||
return {
|
||||
type: "text",
|
||||
text: `[${label} attachment omitted to keep the AI request small: ${details}]`,
|
||||
};
|
||||
}
|
||||
@@ -15,7 +15,7 @@ import {
|
||||
} from '../shared/toolExecutors';
|
||||
import { requestApproval } from '../shared/approvalGate';
|
||||
import { reserveSessionSlot } from '../shared/sessionExecutionQueue';
|
||||
import { truncateTextWithHeadAndTail } from '../requestPayloadBudget';
|
||||
import { truncateTextWithHeadAndTail } from '../requestPayloadCompression';
|
||||
|
||||
const MAX_LIVE_TERMINAL_STDOUT_CHARS = 24_000;
|
||||
const MAX_LIVE_TERMINAL_STDERR_CHARS = 12_000;
|
||||
|
||||
Reference in New Issue
Block a user