fix(ai): compress Catty requests only after 413 (#1327)
Some checks failed
build-packages / dedupe push run (push) Has been cancelled
build-packages / dedupe result (push) Has been cancelled
build-packages / resolve bundled mosh-client (push) Has been cancelled
build-packages / resolve bundled et-client (push) Has been cancelled
build-packages / build-macos (push) Has been cancelled
build-packages / build-windows (push) Has been cancelled
build-packages / ${{ needs.dedupe.outputs.skip_heavy_ci == 'true' && 'deduped build-linux-x64' || 'build-linux-x64' }} (push) Has been cancelled
build-packages / ${{ needs.dedupe.outputs.skip_heavy_ci == 'true' && 'deduped build-linux-arm64' || 'build-linux-arm64' }} (push) Has been cancelled
build-packages / release (push) Has been cancelled
build-packages / bump homebrew tap (push) Has been cancelled
Some checks failed
build-packages / dedupe push run (push) Has been cancelled
build-packages / dedupe result (push) Has been cancelled
build-packages / resolve bundled mosh-client (push) Has been cancelled
build-packages / resolve bundled et-client (push) Has been cancelled
build-packages / build-macos (push) Has been cancelled
build-packages / build-windows (push) Has been cancelled
build-packages / ${{ needs.dedupe.outputs.skip_heavy_ci == 'true' && 'deduped build-linux-x64' || 'build-linux-x64' }} (push) Has been cancelled
build-packages / ${{ needs.dedupe.outputs.skip_heavy_ci == 'true' && 'deduped build-linux-arm64' || 'build-linux-arm64' }} (push) Has been cancelled
build-packages / release (push) Has been cancelled
build-packages / bump homebrew tap (push) Has been cancelled
* fix(ai): compress Catty requests only after 413 * fix(ai): retry 413 after tool progress safely * fix(ai): mark thrown 413 retries after tool progress * fix(ai): preserve tool results in 413 retry
This commit is contained in:
@@ -91,6 +91,23 @@ test("buildHistoricalToolResultReplayText keeps non-terminal tool results intact
|
|||||||
assert.equal(buildHistoricalToolResultReplayText(result, toolCall), "search result summary");
|
assert.equal(buildHistoricalToolResultReplayText(result, toolCall), "search result summary");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("buildHistoricalToolResultReplayText can preserve terminal output for 413 retries", () => {
|
||||||
|
const toolCall: ToolCall = {
|
||||||
|
id: "call-1",
|
||||||
|
name: "terminal_execute",
|
||||||
|
arguments: { command: "npm test" },
|
||||||
|
};
|
||||||
|
const result: ToolResult = {
|
||||||
|
toolCallId: "call-1",
|
||||||
|
content: "real terminal output",
|
||||||
|
};
|
||||||
|
|
||||||
|
assert.equal(
|
||||||
|
buildHistoricalToolResultReplayText(result, toolCall, { preserveTerminalOutput: true }),
|
||||||
|
"real terminal output",
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
test("buildHistoricalToolReplayMaps pairs reused tool ids with the nearest preceding call", () => {
|
test("buildHistoricalToolReplayMaps pairs reused tool ids with the nearest preceding call", () => {
|
||||||
const messages: ChatMessage[] = [
|
const messages: ChatMessage[] = [
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -112,9 +112,14 @@ function findLastIndex<T>(items: T[], predicate: (item: T) => boolean): number {
|
|||||||
export function buildHistoricalToolResultReplayText(
|
export function buildHistoricalToolResultReplayText(
|
||||||
result: ToolResult,
|
result: ToolResult,
|
||||||
toolCall?: ToolCall,
|
toolCall?: ToolCall,
|
||||||
|
{
|
||||||
|
preserveTerminalOutput = false,
|
||||||
|
}: {
|
||||||
|
preserveTerminalOutput?: boolean;
|
||||||
|
} = {},
|
||||||
): string {
|
): string {
|
||||||
const toolName = toolCall?.name ?? "unknown";
|
const toolName = toolCall?.name ?? "unknown";
|
||||||
if (!isTerminalToolName(toolName)) {
|
if (!isTerminalToolName(toolName) || preserveTerminalOutput) {
|
||||||
return result.content;
|
return result.content;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ import type {
|
|||||||
ExternalAgentConfig,
|
ExternalAgentConfig,
|
||||||
ProviderAdvancedParams,
|
ProviderAdvancedParams,
|
||||||
ProviderConfig,
|
ProviderConfig,
|
||||||
|
ToolResult,
|
||||||
WebSearchConfig,
|
WebSearchConfig,
|
||||||
} from '../../../infrastructure/ai/types';
|
} from '../../../infrastructure/ai/types';
|
||||||
import { isWebSearchReady } from '../../../infrastructure/ai/types';
|
import { isWebSearchReady } from '../../../infrastructure/ai/types';
|
||||||
@@ -36,9 +37,12 @@ import {
|
|||||||
resolveContextWindow,
|
resolveContextWindow,
|
||||||
} from '../../../infrastructure/ai/contextCompaction';
|
} from '../../../infrastructure/ai/contextCompaction';
|
||||||
import {
|
import {
|
||||||
estimateUtf8Bytes,
|
compressMessagesForRequestTooLargeRetry,
|
||||||
fitMessagesToRequestPayloadBudget,
|
} from '../../../infrastructure/ai/requestPayloadCompression';
|
||||||
} from '../../../infrastructure/ai/requestPayloadBudget';
|
import {
|
||||||
|
createCattyRequestTooLargeRetryError,
|
||||||
|
hadToolProgressBeforeRequestTooLarge,
|
||||||
|
} from '../../../infrastructure/ai/cattyRequestTooLargeRetry';
|
||||||
import { createModelFromConfig } from '../../../infrastructure/ai/sdk/providers';
|
import { createModelFromConfig } from '../../../infrastructure/ai/sdk/providers';
|
||||||
import { createCattyTools } from '../../../infrastructure/ai/sdk/tools';
|
import { createCattyTools } from '../../../infrastructure/ai/sdk/tools';
|
||||||
import type { ExecutorContext } from '../../../infrastructure/ai/cattyAgent/executor';
|
import type { ExecutorContext } from '../../../infrastructure/ai/cattyAgent/executor';
|
||||||
@@ -50,6 +54,7 @@ import {
|
|||||||
buildPromptWithTerminalSelectionAttachments,
|
buildPromptWithTerminalSelectionAttachments,
|
||||||
isTerminalSelectionAttachment,
|
isTerminalSelectionAttachment,
|
||||||
} from '../../../application/state/terminalSelectionAttachment';
|
} from '../../../application/state/terminalSelectionAttachment';
|
||||||
|
import { latestAISessionsSnapshot } from '../../../application/state/aiStateSnapshots';
|
||||||
import {
|
import {
|
||||||
buildHistoricalToolReplayMaps,
|
buildHistoricalToolReplayMaps,
|
||||||
buildHistoricalToolResultReplayText,
|
buildHistoricalToolResultReplayText,
|
||||||
@@ -343,7 +348,7 @@ export function useAIChatStreaming({
|
|||||||
// Track the current assistant message ID so updates target the correct message
|
// Track the current assistant message ID so updates target the correct message
|
||||||
let activeMsgId = currentAssistantMsgId;
|
let activeMsgId = currentAssistantMsgId;
|
||||||
let lastAddedRole: 'assistant' | 'tool' = 'assistant';
|
let lastAddedRole: 'assistant' | 'tool' = 'assistant';
|
||||||
let hasRetryUnsafeToolProgress = false;
|
let hadToolProgress = false;
|
||||||
const reader = result.fullStream.getReader();
|
const reader = result.fullStream.getReader();
|
||||||
|
|
||||||
// -- Text-delta batching: accumulate deltas and flush periodically --
|
// -- Text-delta batching: accumulate deltas and flush periodically --
|
||||||
@@ -419,7 +424,16 @@ export function useAIChatStreaming({
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
while (true) {
|
while (true) {
|
||||||
const { done, value } = await reader.read();
|
let readResult: ReadableStreamReadResult<unknown>;
|
||||||
|
try {
|
||||||
|
readResult = await reader.read();
|
||||||
|
} catch (readErr) {
|
||||||
|
if (isRequestTooLargeError(readErr)) {
|
||||||
|
throw createCattyRequestTooLargeRetryError(readErr, hadToolProgress);
|
||||||
|
}
|
||||||
|
throw readErr;
|
||||||
|
}
|
||||||
|
const { done, value } = readResult;
|
||||||
if (done) break;
|
if (done) break;
|
||||||
// Use the StreamChunk union for type narrowing instead of unsafe casts
|
// Use the StreamChunk union for type narrowing instead of unsafe casts
|
||||||
const chunk = value as StreamChunk;
|
const chunk = value as StreamChunk;
|
||||||
@@ -486,7 +500,7 @@ export function useAIChatStreaming({
|
|||||||
cancelPendingFlush();
|
cancelPendingFlush();
|
||||||
flushText();
|
flushText();
|
||||||
const typedChunk = chunk as ToolCallChunk;
|
const typedChunk = chunk as ToolCallChunk;
|
||||||
hasRetryUnsafeToolProgress = true;
|
hadToolProgress = true;
|
||||||
const messageId = ensureAssistantMessage();
|
const messageId = ensureAssistantMessage();
|
||||||
const providerOptions = normalizeProviderContinuationOptions(typedChunk.providerMetadata);
|
const providerOptions = normalizeProviderContinuationOptions(typedChunk.providerMetadata);
|
||||||
updateMessageById(streamSessionId, messageId, msg => ({
|
updateMessageById(streamSessionId, messageId, msg => ({
|
||||||
@@ -512,7 +526,7 @@ export function useAIChatStreaming({
|
|||||||
cancelPendingFlush();
|
cancelPendingFlush();
|
||||||
flushText();
|
flushText();
|
||||||
const typedChunk = chunk as ToolResultChunk;
|
const typedChunk = chunk as ToolResultChunk;
|
||||||
hasRetryUnsafeToolProgress = true;
|
hadToolProgress = true;
|
||||||
// Mark the assistant message's tool execution as completed
|
// Mark the assistant message's tool execution as completed
|
||||||
updateMessageById(streamSessionId, activeMsgId, msg =>
|
updateMessageById(streamSessionId, activeMsgId, msg =>
|
||||||
msg.role === 'assistant' && msg.executionStatus === 'running'
|
msg.role === 'assistant' && msg.executionStatus === 'running'
|
||||||
@@ -559,10 +573,13 @@ export function useAIChatStreaming({
|
|||||||
console.warn('[Catty] suppressed SDK stream state error:', typedChunk.error);
|
console.warn('[Catty] suppressed SDK stream state error:', typedChunk.error);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (isRequestTooLargeError(typedChunk.error) && !hasRetryUnsafeToolProgress) {
|
if (isRequestTooLargeError(typedChunk.error)) {
|
||||||
cancelPendingFlush();
|
cancelPendingFlush();
|
||||||
flushText();
|
flushText();
|
||||||
throw typedChunk.error;
|
throw createCattyRequestTooLargeRetryError(
|
||||||
|
typedChunk.error,
|
||||||
|
hadToolProgress,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
cancelPendingFlush();
|
cancelPendingFlush();
|
||||||
flushText();
|
flushText();
|
||||||
@@ -796,44 +813,86 @@ export function useAIChatStreaming({
|
|||||||
};
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Issue #5: Build SDK messages including tool-call and tool-result messages
|
let openAIChatAssistantFieldsByMessage = new Map<ModelMessage, OpenAIChatAssistantFields | undefined>();
|
||||||
// so the LLM maintains full conversation context
|
const buildSdkMessages = (
|
||||||
const allMessages = currentSession?.messages ?? [];
|
allMessages: ChatMessage[],
|
||||||
|
includeCurrentUserMessage: boolean,
|
||||||
|
{
|
||||||
|
preserveTerminalToolResults = new Set<ToolResult>(),
|
||||||
|
}: {
|
||||||
|
preserveTerminalToolResults?: ReadonlySet<ToolResult>;
|
||||||
|
} = {},
|
||||||
|
): Array<ModelMessage> => {
|
||||||
|
const { resolvedToolCallsByAssistant, toolCallByToolResult } = buildHistoricalToolReplayMaps(allMessages);
|
||||||
|
const nextFieldsByMessage = new Map<ModelMessage, OpenAIChatAssistantFields | undefined>();
|
||||||
|
const sdkMessages: Array<ModelMessage> = [];
|
||||||
|
let previousHistoryMessageWasToolResult = false;
|
||||||
|
|
||||||
const { resolvedToolCallsByAssistant, toolCallByToolResult } = buildHistoricalToolReplayMaps(allMessages);
|
for (const m of allMessages) {
|
||||||
|
const currentMessageFollowsToolResult = previousHistoryMessageWasToolResult;
|
||||||
const sdkMessages: Array<ModelMessage> = [];
|
if (m.role === 'user') {
|
||||||
const openAIChatAssistantFieldsByMessage = new Map<ModelMessage, OpenAIChatAssistantFields | undefined>();
|
// Historical attachments are replayed as placeholders so screenshots,
|
||||||
let previousHistoryMessageWasToolResult = false;
|
// files, and terminal selections do not balloon every follow-up request.
|
||||||
for (const m of allMessages) {
|
const messageAttachments = m.attachments ?? m.images;
|
||||||
const currentMessageFollowsToolResult = previousHistoryMessageWasToolResult;
|
sdkMessages.push({
|
||||||
if (m.role === 'user') {
|
role: 'user',
|
||||||
// Historical attachments are replayed as placeholders so screenshots,
|
content: buildHistoricalUserReplayContent(m.content, messageAttachments ?? []),
|
||||||
// files, and terminal selections do not balloon every follow-up request.
|
});
|
||||||
const messageAttachments = m.attachments ?? m.images;
|
} else if (m.role === 'assistant') {
|
||||||
sdkMessages.push({
|
const activeContinuation = isProviderContinuationForSource(
|
||||||
role: 'user',
|
m.providerContinuation,
|
||||||
content: buildHistoricalUserReplayContent(m.content, messageAttachments ?? []),
|
continuationContext.source,
|
||||||
});
|
)
|
||||||
} else if (m.role === 'assistant') {
|
? m.providerContinuation
|
||||||
const activeContinuation = isProviderContinuationForSource(
|
: undefined;
|
||||||
m.providerContinuation,
|
const openAIChatAssistantFields = getOpenAIChatAssistantFieldsForHistoryMessage(
|
||||||
continuationContext.source,
|
m,
|
||||||
)
|
continuationContext.source,
|
||||||
? m.providerContinuation
|
);
|
||||||
: undefined;
|
if (m.toolCalls?.length) {
|
||||||
const openAIChatAssistantFields = getOpenAIChatAssistantFieldsForHistoryMessage(
|
// Only include tool calls that have matching results
|
||||||
m,
|
const resolvedToolCalls = resolvedToolCallsByAssistant.get(m);
|
||||||
continuationContext.source,
|
const resolvedCalls = resolvedToolCalls
|
||||||
);
|
? m.toolCalls.filter(tc => resolvedToolCalls.has(tc))
|
||||||
if (m.toolCalls?.length) {
|
: [];
|
||||||
// Only include tool calls that have matching results
|
const contentParts: AssistantContentPart[] = [];
|
||||||
const resolvedToolCalls = resolvedToolCallsByAssistant.get(m);
|
if (resolvedCalls.length > 0) {
|
||||||
const resolvedCalls = resolvedToolCalls
|
for (const part of activeContinuation?.reasoningParts ?? []) {
|
||||||
? m.toolCalls.filter(tc => resolvedToolCalls.has(tc))
|
if (!part.text && !part.providerOptions) continue;
|
||||||
: [];
|
contentParts.push({
|
||||||
const contentParts: AssistantContentPart[] = [];
|
type: 'reasoning' as const,
|
||||||
if (resolvedCalls.length > 0) {
|
text: part.text,
|
||||||
|
...(part.providerOptions ? { providerOptions: part.providerOptions } : {}),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (m.content) {
|
||||||
|
contentParts.push({
|
||||||
|
type: 'text' as const,
|
||||||
|
text: m.content,
|
||||||
|
...(activeContinuation?.textProviderOptions ? { providerOptions: activeContinuation.textProviderOptions } : {}),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
for (const tc of resolvedCalls) {
|
||||||
|
const providerOptions = activeContinuation?.toolCallProviderOptionsById?.[tc.id];
|
||||||
|
contentParts.push({
|
||||||
|
type: 'tool-call' as const,
|
||||||
|
toolCallId: tc.id,
|
||||||
|
toolName: tc.name,
|
||||||
|
input: tc.arguments ?? {},
|
||||||
|
...(providerOptions ? { providerOptions } : {}),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// If all tool calls were orphaned, just include the text content
|
||||||
|
if (contentParts.length > 0) {
|
||||||
|
const message: ModelMessage = { role: 'assistant', content: toAssistantModelContent(contentParts) };
|
||||||
|
sdkMessages.push(message);
|
||||||
|
if (resolvedCalls.length > 0) {
|
||||||
|
rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, nextFieldsByMessage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (m.content) {
|
||||||
|
const contentParts: AssistantContentPart[] = [];
|
||||||
for (const part of activeContinuation?.reasoningParts ?? []) {
|
for (const part of activeContinuation?.reasoningParts ?? []) {
|
||||||
if (!part.text && !part.providerOptions) continue;
|
if (!part.text && !part.providerOptions) continue;
|
||||||
contentParts.push({
|
contentParts.push({
|
||||||
@@ -842,95 +901,91 @@ export function useAIChatStreaming({
|
|||||||
...(part.providerOptions ? { providerOptions: part.providerOptions } : {}),
|
...(part.providerOptions ? { providerOptions: part.providerOptions } : {}),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if (m.content) {
|
|
||||||
contentParts.push({
|
contentParts.push({
|
||||||
type: 'text' as const,
|
type: 'text' as const,
|
||||||
text: m.content,
|
text: m.content,
|
||||||
...(activeContinuation?.textProviderOptions ? { providerOptions: activeContinuation.textProviderOptions } : {}),
|
...(activeContinuation?.textProviderOptions ? { providerOptions: activeContinuation.textProviderOptions } : {}),
|
||||||
});
|
});
|
||||||
}
|
const message: ModelMessage = {
|
||||||
for (const tc of resolvedCalls) {
|
role: 'assistant',
|
||||||
const providerOptions = activeContinuation?.toolCallProviderOptionsById?.[tc.id];
|
content: toAssistantModelContent(contentParts),
|
||||||
contentParts.push({
|
};
|
||||||
type: 'tool-call' as const,
|
|
||||||
toolCallId: tc.id,
|
|
||||||
toolName: tc.name,
|
|
||||||
input: tc.arguments ?? {},
|
|
||||||
...(providerOptions ? { providerOptions } : {}),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
// If all tool calls were orphaned, just include the text content
|
|
||||||
if (contentParts.length > 0) {
|
|
||||||
const message: ModelMessage = { role: 'assistant', content: toAssistantModelContent(contentParts) };
|
|
||||||
sdkMessages.push(message);
|
sdkMessages.push(message);
|
||||||
if (resolvedCalls.length > 0) {
|
if (currentMessageFollowsToolResult) {
|
||||||
rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, openAIChatAssistantFieldsByMessage);
|
rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, nextFieldsByMessage);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (m.content) {
|
} else if (m.role === 'tool' && m.toolResults?.length) {
|
||||||
const contentParts: AssistantContentPart[] = [];
|
sdkMessages.push({
|
||||||
for (const part of activeContinuation?.reasoningParts ?? []) {
|
role: 'tool',
|
||||||
if (!part.text && !part.providerOptions) continue;
|
content: m.toolResults.map(tr => {
|
||||||
contentParts.push({
|
const toolCall = toolCallByToolResult.get(tr);
|
||||||
type: 'reasoning' as const,
|
return {
|
||||||
text: part.text,
|
type: 'tool-result' as const,
|
||||||
...(part.providerOptions ? { providerOptions: part.providerOptions } : {}),
|
toolCallId: tr.toolCallId,
|
||||||
});
|
toolName: toolCall?.name ?? 'unknown',
|
||||||
}
|
output: {
|
||||||
contentParts.push({
|
type: 'text' as const,
|
||||||
type: 'text' as const,
|
value: buildHistoricalToolResultReplayText(tr, toolCall, {
|
||||||
text: m.content,
|
preserveTerminalOutput: preserveTerminalToolResults.has(tr),
|
||||||
...(activeContinuation?.textProviderOptions ? { providerOptions: activeContinuation.textProviderOptions } : {}),
|
}),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}),
|
||||||
});
|
});
|
||||||
const message: ModelMessage = {
|
}
|
||||||
role: 'assistant',
|
previousHistoryMessageWasToolResult = m.role === 'tool' && !!m.toolResults?.length;
|
||||||
content: toAssistantModelContent(contentParts),
|
}
|
||||||
};
|
|
||||||
sdkMessages.push(message);
|
if (includeCurrentUserMessage) {
|
||||||
if (currentMessageFollowsToolResult) {
|
// Build the current user message — include attachments as multimodal content
|
||||||
rememberOpenAIChatAssistantFields(message, openAIChatAssistantFields, openAIChatAssistantFieldsByMessage);
|
if (attachments?.length) {
|
||||||
|
const modelText = buildPromptWithTerminalSelectionAttachments(trimmed, attachments);
|
||||||
|
const modelAttachments = attachments.filter(
|
||||||
|
(attachment) => !isTerminalSelectionAttachment(attachment),
|
||||||
|
);
|
||||||
|
if (!modelAttachments.length) {
|
||||||
|
sdkMessages.push({ role: 'user', content: modelText });
|
||||||
|
} else {
|
||||||
|
const parts: Array<{ type: 'text'; text: string } | { type: 'image'; image: string; mediaType?: string } | { type: 'file'; data: string; mediaType: string; filename?: string }> = [];
|
||||||
|
parts.push({ type: 'text', text: modelText });
|
||||||
|
for (const att of modelAttachments) {
|
||||||
|
if (att.mediaType.startsWith('image/')) {
|
||||||
|
parts.push({ type: 'image', image: att.base64Data, mediaType: att.mediaType });
|
||||||
|
} else {
|
||||||
|
parts.push({ type: 'file', data: att.base64Data, mediaType: att.mediaType, filename: att.filename });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sdkMessages.push({ role: 'user', content: parts });
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} else if (m.role === 'tool' && m.toolResults?.length) {
|
|
||||||
sdkMessages.push({
|
|
||||||
role: 'tool',
|
|
||||||
content: m.toolResults.map(tr => {
|
|
||||||
const toolCall = toolCallByToolResult.get(tr);
|
|
||||||
return {
|
|
||||||
type: 'tool-result' as const,
|
|
||||||
toolCallId: tr.toolCallId,
|
|
||||||
toolName: toolCall?.name ?? 'unknown',
|
|
||||||
output: { type: 'text' as const, value: buildHistoricalToolResultReplayText(tr, toolCall) },
|
|
||||||
};
|
|
||||||
}),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
previousHistoryMessageWasToolResult = m.role === 'tool' && !!m.toolResults?.length;
|
|
||||||
}
|
|
||||||
// Build the current user message — include attachments as multimodal content
|
|
||||||
if (attachments?.length) {
|
|
||||||
const modelText = buildPromptWithTerminalSelectionAttachments(trimmed, attachments);
|
|
||||||
const modelAttachments = attachments.filter(
|
|
||||||
(attachment) => !isTerminalSelectionAttachment(attachment),
|
|
||||||
);
|
|
||||||
if (!modelAttachments.length) {
|
|
||||||
sdkMessages.push({ role: 'user', content: modelText });
|
|
||||||
} else {
|
|
||||||
const parts: Array<{ type: 'text'; text: string } | { type: 'image'; image: string; mediaType?: string } | { type: 'file'; data: string; mediaType: string; filename?: string }> = [];
|
|
||||||
parts.push({ type: 'text', text: modelText });
|
|
||||||
for (const att of modelAttachments) {
|
|
||||||
if (att.mediaType.startsWith('image/')) {
|
|
||||||
parts.push({ type: 'image', image: att.base64Data, mediaType: att.mediaType });
|
|
||||||
} else {
|
} else {
|
||||||
parts.push({ type: 'file', data: att.base64Data, mediaType: att.mediaType, filename: att.filename });
|
sdkMessages.push({ role: 'user', content: trimmed });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
sdkMessages.push({ role: 'user', content: parts });
|
|
||||||
|
openAIChatAssistantFieldsByMessage = nextFieldsByMessage;
|
||||||
|
return sdkMessages;
|
||||||
|
};
|
||||||
|
|
||||||
|
const sdkMessages = buildSdkMessages(currentSession?.messages ?? [], true);
|
||||||
|
const collectToolResultsAfterMessage = (
|
||||||
|
messages: ChatMessage[],
|
||||||
|
messageId: string,
|
||||||
|
): Set<ToolResult> => {
|
||||||
|
const results = new Set<ToolResult>();
|
||||||
|
let afterMessage = false;
|
||||||
|
for (const message of messages) {
|
||||||
|
if (message.id === messageId) {
|
||||||
|
afterMessage = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!afterMessage || message.role !== 'tool' || !message.toolResults?.length) continue;
|
||||||
|
for (const result of message.toolResults) {
|
||||||
|
results.add(result);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
return results;
|
||||||
sdkMessages.push({ role: 'user', content: trimmed });
|
};
|
||||||
}
|
|
||||||
|
|
||||||
// Create model with placeholder API key — the main process injects the real
|
// Create model with placeholder API key — the main process injects the real
|
||||||
// decrypted key when the HTTP request is proxied through IPC, so plaintext
|
// decrypted key when the HTTP request is proxied through IPC, so plaintext
|
||||||
@@ -958,20 +1013,12 @@ export function useAIChatStreaming({
|
|||||||
defaultContextWindow: DEFAULT_CONTEXT_WINDOW_TOKENS,
|
defaultContextWindow: DEFAULT_CONTEXT_WINDOW_TOKENS,
|
||||||
});
|
});
|
||||||
const outputReserveTokens = Math.min(4096, Math.ceil(contextWindow * 0.05));
|
const outputReserveTokens = Math.min(4096, Math.ceil(contextWindow * 0.05));
|
||||||
const requestReserveTokens = outputReserveTokens + estimateUnknownTokens({
|
const getRequestReserveTokens = () => outputReserveTokens + estimateUnknownTokens({
|
||||||
systemPrompt,
|
systemPrompt,
|
||||||
toolNames: Object.keys(tools),
|
toolNames: Object.keys(tools),
|
||||||
openAIChatAssistantFields: Array.from(openAIChatAssistantFieldsByMessage.values()),
|
openAIChatAssistantFields: Array.from(openAIChatAssistantFieldsByMessage.values()),
|
||||||
});
|
});
|
||||||
|
|
||||||
const payloadReservedBytes = estimateUtf8Bytes({
|
|
||||||
system: systemPrompt,
|
|
||||||
tools: Object.keys(tools),
|
|
||||||
});
|
|
||||||
const applyRequestPayloadBudget = (messages: ModelMessage[]) => fitMessagesToRequestPayloadBudget({
|
|
||||||
messages,
|
|
||||||
reservedBytes: payloadReservedBytes,
|
|
||||||
});
|
|
||||||
const summarizeForCompaction = async (messagesToSummarize: ModelMessage[]) => {
|
const summarizeForCompaction = async (messagesToSummarize: ModelMessage[]) => {
|
||||||
updateLastMessage(sessionId, msg => ({ ...msg, statusText: 'Compacting earlier context...' }));
|
updateLastMessage(sessionId, msg => ({ ...msg, statusText: 'Compacting earlier context...' }));
|
||||||
const result = await generateText({
|
const result = await generateText({
|
||||||
@@ -999,64 +1046,64 @@ export function useAIChatStreaming({
|
|||||||
);
|
);
|
||||||
return pruned;
|
return pruned;
|
||||||
};
|
};
|
||||||
const compactAndBudgetMessages = async (
|
const compactMessages = async (
|
||||||
messages: ModelMessage[],
|
messages: ModelMessage[],
|
||||||
{
|
{
|
||||||
force = false,
|
force = false,
|
||||||
statusText,
|
statusText,
|
||||||
trimLog,
|
|
||||||
fallbackLog,
|
fallbackLog,
|
||||||
|
compressForRequestTooLargeRetry = false,
|
||||||
|
compressionLog,
|
||||||
}: {
|
}: {
|
||||||
force?: boolean;
|
force?: boolean;
|
||||||
statusText?: string;
|
statusText?: string;
|
||||||
trimLog: string;
|
|
||||||
fallbackLog: string;
|
fallbackLog: string;
|
||||||
|
compressForRequestTooLargeRetry?: boolean;
|
||||||
|
compressionLog?: string;
|
||||||
},
|
},
|
||||||
): Promise<ModelMessage[]> => {
|
): Promise<ModelMessage[]> => {
|
||||||
|
const compressRetryMessages = (candidateMessages: ModelMessage[], log?: string): ModelMessage[] => {
|
||||||
|
if (!compressForRequestTooLargeRetry) return candidateMessages;
|
||||||
|
const compressed = compressMessagesForRequestTooLargeRetry(candidateMessages);
|
||||||
|
if (compressed.didAdjust && log) {
|
||||||
|
console.warn(log);
|
||||||
|
}
|
||||||
|
return compressed.messages;
|
||||||
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (statusText) {
|
if (statusText) {
|
||||||
updateLastMessage(sessionId, msg => ({ ...msg, statusText }));
|
updateLastMessage(sessionId, msg => ({ ...msg, statusText }));
|
||||||
}
|
}
|
||||||
|
const inputMessages = compressRetryMessages(messages, compressionLog);
|
||||||
const compacted = await prepareContextCompaction({
|
const compacted = await prepareContextCompaction({
|
||||||
messages,
|
messages: inputMessages,
|
||||||
contextWindow,
|
contextWindow,
|
||||||
reservedTokens: requestReserveTokens,
|
reservedTokens: getRequestReserveTokens(),
|
||||||
thresholdRatio: force ? 0 : undefined,
|
thresholdRatio: force ? 0 : undefined,
|
||||||
protectRecentMessages: DEFAULT_PROTECT_RECENT_MESSAGES,
|
protectRecentMessages: DEFAULT_PROTECT_RECENT_MESSAGES,
|
||||||
summarize: summarizeForCompaction,
|
summarize: summarizeForCompaction,
|
||||||
});
|
});
|
||||||
let nextMessages = force && !compacted.didCompact
|
let nextMessages = force && !compacted.didCompact
|
||||||
? keepRecentContextMessages(messages, DEFAULT_PROTECT_RECENT_MESSAGES)
|
? keepRecentContextMessages(inputMessages, DEFAULT_PROTECT_RECENT_MESSAGES)
|
||||||
: compacted.messages;
|
: compacted.messages;
|
||||||
const budgetResult = applyRequestPayloadBudget(nextMessages);
|
return compressRetryMessages(nextMessages);
|
||||||
if (budgetResult.didAdjust) {
|
|
||||||
console.warn(`${trimLog} ${budgetResult.estimatedBytes} bytes.`);
|
|
||||||
nextMessages = budgetResult.messages;
|
|
||||||
}
|
|
||||||
return nextMessages;
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
if (abortController.signal.aborted) throw err;
|
if (abortController.signal.aborted) throw err;
|
||||||
console.warn(fallbackLog, err);
|
console.warn(fallbackLog, err);
|
||||||
const fallbackBudget = applyRequestPayloadBudget(
|
const fallbackMessages = keepRecentContextMessages(messages, DEFAULT_PROTECT_RECENT_MESSAGES);
|
||||||
keepRecentContextMessages(messages, DEFAULT_PROTECT_RECENT_MESSAGES),
|
if (!compressForRequestTooLargeRetry) {
|
||||||
);
|
return fallbackMessages;
|
||||||
if (fallbackBudget.didAdjust) {
|
|
||||||
console.warn(
|
|
||||||
`[Catty] Request payload trimmed to ${fallbackBudget.estimatedBytes} bytes after compaction fallback.`,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
return fallbackBudget.messages;
|
const compressed = compressMessagesForRequestTooLargeRetry(fallbackMessages);
|
||||||
|
if (compressed.didAdjust) {
|
||||||
|
console.warn('[Catty] Request content compressed after compaction fallback.');
|
||||||
|
}
|
||||||
|
return compressed.messages;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
const payloadBudgetResult = applyRequestPayloadBudget(sdkMessages);
|
let messagesForStream = sdkMessages;
|
||||||
let messagesForStream = payloadBudgetResult.messages;
|
messagesForStream = await compactMessages(messagesForStream, {
|
||||||
if (payloadBudgetResult.didAdjust) {
|
|
||||||
console.warn(
|
|
||||||
`[Catty] Request payload trimmed to ${payloadBudgetResult.estimatedBytes} bytes to avoid HTTP 413.`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
messagesForStream = await compactAndBudgetMessages(messagesForStream, {
|
|
||||||
trimLog: '[Catty] Request payload re-trimmed after context compaction to',
|
|
||||||
fallbackLog: '[Catty] Context compaction failed; falling back to recent messages only:',
|
fallbackLog: '[Catty] Context compaction failed; falling back to recent messages only:',
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -1080,23 +1127,50 @@ export function useAIChatStreaming({
|
|||||||
}
|
}
|
||||||
|
|
||||||
console.warn('[Catty] Request hit HTTP 413; forcing context compaction and retrying once.', streamErr);
|
console.warn('[Catty] Request hit HTTP 413; forcing context compaction and retrying once.', streamErr);
|
||||||
updateMessageById(sessionId, assistantMsgId, msg => ({
|
const statusText = 'Request was too large. Compacting context and retrying...';
|
||||||
...msg,
|
const hadToolProgress = hadToolProgressBeforeRequestTooLarge(streamErr);
|
||||||
content: '',
|
let retryBaseMessages = messagesForStream;
|
||||||
thinking: undefined,
|
let retryAssistantMsgId = assistantMsgId;
|
||||||
thinkingDurationMs: undefined,
|
if (hadToolProgress) {
|
||||||
providerContinuation: undefined,
|
const latestSession = latestAISessionsSnapshot?.find(session => session.id === sessionId);
|
||||||
toolCalls: undefined,
|
if (latestSession) {
|
||||||
errorInfo: undefined,
|
retryBaseMessages = buildSdkMessages(latestSession.messages, false, {
|
||||||
executionStatus: undefined,
|
preserveTerminalToolResults: collectToolResultsAfterMessage(
|
||||||
pendingApproval: undefined,
|
latestSession.messages,
|
||||||
statusText: 'Request was too large. Compacting context and retrying...',
|
assistantMsgId,
|
||||||
}));
|
),
|
||||||
const retryMessages = prepareMessagesForStream(await compactAndBudgetMessages(messagesForStream, {
|
});
|
||||||
|
}
|
||||||
|
retryAssistantMsgId = generateId();
|
||||||
|
addMessageToSession(sessionId, {
|
||||||
|
id: retryAssistantMsgId,
|
||||||
|
role: 'assistant',
|
||||||
|
content: '',
|
||||||
|
timestamp: Date.now(),
|
||||||
|
model: activeModelId || context.activeProvider?.defaultModel || '',
|
||||||
|
providerId: context.activeProvider?.providerId,
|
||||||
|
statusText,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
updateMessageById(sessionId, assistantMsgId, msg => ({
|
||||||
|
...msg,
|
||||||
|
content: '',
|
||||||
|
thinking: undefined,
|
||||||
|
thinkingDurationMs: undefined,
|
||||||
|
providerContinuation: undefined,
|
||||||
|
toolCalls: undefined,
|
||||||
|
errorInfo: undefined,
|
||||||
|
executionStatus: undefined,
|
||||||
|
pendingApproval: undefined,
|
||||||
|
statusText,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
const retryMessages = prepareMessagesForStream(await compactMessages(retryBaseMessages, {
|
||||||
force: true,
|
force: true,
|
||||||
statusText: 'Request was too large. Compacting context and retrying...',
|
statusText,
|
||||||
trimLog: '[Catty] Request payload trimmed after forced context compaction to',
|
|
||||||
fallbackLog: '[Catty] Forced context compaction after 413 failed; falling back to recent messages only:',
|
fallbackLog: '[Catty] Forced context compaction after 413 failed; falling back to recent messages only:',
|
||||||
|
compressForRequestTooLargeRetry: true,
|
||||||
|
compressionLog: '[Catty] Request content compressed after forced context compaction.',
|
||||||
}));
|
}));
|
||||||
|
|
||||||
await processCattyStream(
|
await processCattyStream(
|
||||||
@@ -1106,7 +1180,7 @@ export function useAIChatStreaming({
|
|||||||
tools,
|
tools,
|
||||||
retryMessages,
|
retryMessages,
|
||||||
abortController.signal,
|
abortController.signal,
|
||||||
assistantMsgId,
|
retryAssistantMsgId,
|
||||||
context.activeProvider?.advancedParams,
|
context.activeProvider?.advancedParams,
|
||||||
continuationContext,
|
continuationContext,
|
||||||
);
|
);
|
||||||
@@ -1123,7 +1197,7 @@ export function useAIChatStreaming({
|
|||||||
}
|
}
|
||||||
}, [
|
}, [
|
||||||
processCattyStream, reportStreamError, setStreamingForScope,
|
processCattyStream, reportStreamError, setStreamingForScope,
|
||||||
updateLastMessage, updateMessageById,
|
addMessageToSession, updateLastMessage, updateMessageById,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
|
|||||||
29
infrastructure/ai/cattyRequestTooLargeRetry.test.ts
Normal file
29
infrastructure/ai/cattyRequestTooLargeRetry.test.ts
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
import test from "node:test";
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
|
||||||
|
import {
|
||||||
|
createCattyRequestTooLargeRetryError,
|
||||||
|
hadToolProgressBeforeRequestTooLarge,
|
||||||
|
} from "./cattyRequestTooLargeRetry.ts";
|
||||||
|
|
||||||
|
test("createCattyRequestTooLargeRetryError marks 413 retry errors after tool progress", () => {
|
||||||
|
const source = Object.assign(new Error("HTTP 413 Request Entity Too Large"), {
|
||||||
|
status: 413,
|
||||||
|
responseBody: "<html>too large</html>",
|
||||||
|
});
|
||||||
|
|
||||||
|
const retryError = createCattyRequestTooLargeRetryError(source, true);
|
||||||
|
|
||||||
|
assert.equal(retryError.statusCode, 413);
|
||||||
|
assert.equal(retryError.status, 413);
|
||||||
|
assert.equal(retryError.responseBody, "<html>too large</html>");
|
||||||
|
assert.equal(retryError.cause, source);
|
||||||
|
assert.equal(hadToolProgressBeforeRequestTooLarge(retryError), true);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("hadToolProgressBeforeRequestTooLarge is false when no tool progress was recorded", () => {
|
||||||
|
const retryError = createCattyRequestTooLargeRetryError("HTTP 413", false);
|
||||||
|
|
||||||
|
assert.equal(hadToolProgressBeforeRequestTooLarge(retryError), false);
|
||||||
|
assert.equal(hadToolProgressBeforeRequestTooLarge(new Error("HTTP 413")), false);
|
||||||
|
});
|
||||||
34
infrastructure/ai/cattyRequestTooLargeRetry.ts
Normal file
34
infrastructure/ai/cattyRequestTooLargeRetry.ts
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
export type CattyRequestTooLargeRetryError = Error & {
|
||||||
|
cattyHadToolProgress?: boolean;
|
||||||
|
statusCode?: number;
|
||||||
|
status?: number;
|
||||||
|
responseBody?: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
export function createCattyRequestTooLargeRetryError(
|
||||||
|
error: unknown,
|
||||||
|
hadToolProgress: boolean,
|
||||||
|
): CattyRequestTooLargeRetryError {
|
||||||
|
const message = error instanceof Error
|
||||||
|
? error.message
|
||||||
|
: String(error ?? 'Request too large');
|
||||||
|
const retryError = new Error(message) as CattyRequestTooLargeRetryError;
|
||||||
|
retryError.name = 'CattyRequestTooLargeRetryError';
|
||||||
|
retryError.cause = error;
|
||||||
|
retryError.cattyHadToolProgress = hadToolProgress;
|
||||||
|
retryError.statusCode = 413;
|
||||||
|
if (error && typeof error === 'object') {
|
||||||
|
const source = error as Record<string, unknown>;
|
||||||
|
if (typeof source.status === 'number') retryError.status = source.status;
|
||||||
|
if (typeof source.responseBody === 'string') retryError.responseBody = source.responseBody;
|
||||||
|
}
|
||||||
|
return retryError;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function hadToolProgressBeforeRequestTooLarge(error: unknown): boolean {
|
||||||
|
return !!(
|
||||||
|
error &&
|
||||||
|
typeof error === 'object' &&
|
||||||
|
(error as { cattyHadToolProgress?: boolean }).cattyHadToolProgress
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -1,233 +0,0 @@
|
|||||||
import test from "node:test";
|
|
||||||
import assert from "node:assert/strict";
|
|
||||||
import type { ModelMessage } from "ai";
|
|
||||||
|
|
||||||
import {
|
|
||||||
DEFAULT_MAX_REQUEST_PAYLOAD_BYTES,
|
|
||||||
compressVerboseText,
|
|
||||||
estimateUtf8Bytes,
|
|
||||||
fitMessagesToRequestPayloadBudget,
|
|
||||||
truncateTextWithHeadAndTail,
|
|
||||||
} from "./requestPayloadBudget.ts";
|
|
||||||
|
|
||||||
test("compressVerboseText collapses repeated blank lines and duplicate runs", () => {
|
|
||||||
const input = "line1\n\n\n\n\nline2\nsame\nsame\nsame\nsame\nline3";
|
|
||||||
const output = compressVerboseText(input);
|
|
||||||
assert.match(output, /line1\n\n\nline2/);
|
|
||||||
assert.ok(output.split("\nsame\n").length <= 3);
|
|
||||||
});
|
|
||||||
|
|
||||||
test("truncateTextWithHeadAndTail keeps both ends of long terminal output", () => {
|
|
||||||
const value = `${"A".repeat(500)}${"B".repeat(20_000)}${"C".repeat(500)}`;
|
|
||||||
const truncated = truncateTextWithHeadAndTail(value, 2_000);
|
|
||||||
assert.ok(truncated.startsWith("AAA"));
|
|
||||||
assert.ok(truncated.includes("[... output truncated for request size ...]"));
|
|
||||||
assert.ok(truncated.endsWith("CCC"));
|
|
||||||
assert.ok(truncated.length <= 2_000);
|
|
||||||
});
|
|
||||||
|
|
||||||
test("fitMessagesToRequestPayloadBudget truncates verbose tool results before dropping recent turns", () => {
|
|
||||||
const messages: ModelMessage[] = [
|
|
||||||
{ role: "user", content: "run build" },
|
|
||||||
{
|
|
||||||
role: "assistant",
|
|
||||||
content: [{
|
|
||||||
type: "tool-call",
|
|
||||||
toolCallId: "call-1",
|
|
||||||
toolName: "terminal_execute",
|
|
||||||
input: { command: "npm run build" },
|
|
||||||
}],
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: "tool",
|
|
||||||
content: [{
|
|
||||||
type: "tool-result",
|
|
||||||
toolCallId: "call-1",
|
|
||||||
toolName: "terminal_execute",
|
|
||||||
output: { type: "text", value: "X".repeat(200_000) },
|
|
||||||
}],
|
|
||||||
},
|
|
||||||
{ role: "user", content: "what failed?" },
|
|
||||||
];
|
|
||||||
|
|
||||||
const result = fitMessagesToRequestPayloadBudget({
|
|
||||||
messages,
|
|
||||||
maxPayloadBytes: 20_000,
|
|
||||||
reservedBytes: 2_000,
|
|
||||||
maxToolResultChars: 4_000,
|
|
||||||
protectRecentMessages: 4,
|
|
||||||
});
|
|
||||||
|
|
||||||
assert.equal(result.messages.length, 4);
|
|
||||||
const toolMessage = result.messages[2];
|
|
||||||
assert.equal(toolMessage.role, "tool");
|
|
||||||
assert.ok(Array.isArray(toolMessage.content));
|
|
||||||
const toolPart = toolMessage.content[0] as { output?: { value?: string } };
|
|
||||||
assert.ok((toolPart.output?.value?.length ?? 0) < 5_000);
|
|
||||||
assert.ok(result.estimatedBytes <= 20_000);
|
|
||||||
});
|
|
||||||
|
|
||||||
test("fitMessagesToRequestPayloadBudget drops older turns when truncation alone is insufficient", () => {
|
|
||||||
const messages: ModelMessage[] = [];
|
|
||||||
for (let turn = 0; turn < 12; turn += 1) {
|
|
||||||
messages.push({ role: "user", content: `question ${turn}` });
|
|
||||||
messages.push({ role: "assistant", content: `answer ${turn} ${"Z".repeat(20_000)}` });
|
|
||||||
}
|
|
||||||
messages.push({ role: "user", content: "latest question" });
|
|
||||||
|
|
||||||
const result = fitMessagesToRequestPayloadBudget({
|
|
||||||
messages,
|
|
||||||
maxPayloadBytes: 8_000,
|
|
||||||
reservedBytes: 500,
|
|
||||||
protectRecentMessages: 4,
|
|
||||||
maxMessageTextChars: 2_000,
|
|
||||||
});
|
|
||||||
|
|
||||||
assert.ok(result.messages.length < messages.length);
|
|
||||||
assert.equal(result.messages.at(-1)?.role, "user");
|
|
||||||
assert.match(String(result.messages.at(-1)?.content ?? ""), /latest question/);
|
|
||||||
assert.ok(result.estimatedBytes <= 8_000);
|
|
||||||
});
|
|
||||||
|
|
||||||
test("estimateUtf8Bytes measures JSON payload size in UTF-8 bytes", () => {
|
|
||||||
const bytes = estimateUtf8Bytes({ text: "caf\u00e9" });
|
|
||||||
assert.ok(bytes > 8);
|
|
||||||
});
|
|
||||||
|
|
||||||
test("estimateUtf8Bytes works in renderer-like environments without Buffer", () => {
|
|
||||||
const originalBuffer = globalThis.Buffer;
|
|
||||||
try {
|
|
||||||
(globalThis as typeof globalThis & { Buffer?: typeof Buffer }).Buffer = undefined;
|
|
||||||
assert.equal(estimateUtf8Bytes({ text: "caf\u00e9" }), new TextEncoder().encode(JSON.stringify({ text: "caf\u00e9" })).byteLength);
|
|
||||||
} finally {
|
|
||||||
(globalThis as typeof globalThis & { Buffer?: typeof Buffer }).Buffer = originalBuffer;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
test("default payload budget remains a general gateway guard", () => {
|
|
||||||
assert.equal(DEFAULT_MAX_REQUEST_PAYLOAD_BYTES, 1_500_000);
|
|
||||||
});
|
|
||||||
|
|
||||||
test("fitMessagesToRequestPayloadBudget preserves current long text when the request is under budget", () => {
|
|
||||||
const currentText = "CURRENT ".repeat(4_000);
|
|
||||||
const result = fitMessagesToRequestPayloadBudget({
|
|
||||||
messages: [{ role: "user", content: currentText }],
|
|
||||||
maxPayloadBytes: 100_000,
|
|
||||||
});
|
|
||||||
|
|
||||||
assert.equal(result.didAdjust, false);
|
|
||||||
assert.equal(result.messages[0].content, currentText);
|
|
||||||
});
|
|
||||||
|
|
||||||
test("fitMessagesToRequestPayloadBudget reports didAdjust when initial truncation succeeds", () => {
|
|
||||||
const messages: ModelMessage[] = [
|
|
||||||
{ role: "user", content: "run build" },
|
|
||||||
{
|
|
||||||
role: "tool",
|
|
||||||
content: [{
|
|
||||||
type: "tool-result",
|
|
||||||
toolCallId: "call-1",
|
|
||||||
toolName: "terminal_execute",
|
|
||||||
output: { type: "text", value: "X".repeat(200_000) },
|
|
||||||
}],
|
|
||||||
},
|
|
||||||
];
|
|
||||||
|
|
||||||
const result = fitMessagesToRequestPayloadBudget({
|
|
||||||
messages,
|
|
||||||
maxPayloadBytes: 20_000,
|
|
||||||
reservedBytes: 2_000,
|
|
||||||
});
|
|
||||||
|
|
||||||
assert.equal(result.didAdjust, true);
|
|
||||||
assert.ok(result.estimatedBytes <= 20_000);
|
|
||||||
});
|
|
||||||
|
|
||||||
test("fitMessagesToRequestPayloadBudget keeps dropping messages after emergency caps when still over budget", () => {
|
|
||||||
const messages: ModelMessage[] = [];
|
|
||||||
for (let turn = 0; turn < 8; turn += 1) {
|
|
||||||
messages.push({ role: "user", content: `question ${turn} ${"Q".repeat(5_000)}` });
|
|
||||||
messages.push({ role: "assistant", content: `answer ${turn} ${"A".repeat(5_000)}` });
|
|
||||||
}
|
|
||||||
|
|
||||||
const result = fitMessagesToRequestPayloadBudget({
|
|
||||||
messages,
|
|
||||||
maxPayloadBytes: 5_000,
|
|
||||||
protectRecentMessages: 8,
|
|
||||||
maxMessageTextChars: 2_000,
|
|
||||||
});
|
|
||||||
|
|
||||||
assert.ok(result.messages.length < messages.length);
|
|
||||||
assert.ok(result.estimatedBytes <= 5_000);
|
|
||||||
});
|
|
||||||
|
|
||||||
test("fitMessagesToRequestPayloadBudget shrinks a single oversized message for very small budgets", () => {
|
|
||||||
const result = fitMessagesToRequestPayloadBudget({
|
|
||||||
messages: [{ role: "assistant", content: "Z".repeat(1_000_000) }],
|
|
||||||
maxPayloadBytes: 1_000,
|
|
||||||
maxMessageTextChars: 500,
|
|
||||||
});
|
|
||||||
|
|
||||||
assert.equal(result.messages.length, 1);
|
|
||||||
assert.ok(result.estimatedBytes <= 1_000);
|
|
||||||
});
|
|
||||||
|
|
||||||
test("fitMessagesToRequestPayloadBudget returns empty messages when budget is fully reserved", () => {
|
|
||||||
const result = fitMessagesToRequestPayloadBudget({
|
|
||||||
messages: [{ role: "user", content: "hello" }],
|
|
||||||
maxPayloadBytes: 100,
|
|
||||||
reservedBytes: 200,
|
|
||||||
});
|
|
||||||
|
|
||||||
assert.deepEqual(result.messages, []);
|
|
||||||
assert.equal(result.didAdjust, true);
|
|
||||||
assert.equal(result.estimatedBytes, 0);
|
|
||||||
});
|
|
||||||
|
|
||||||
test("fitMessagesToRequestPayloadBudget omits latest attachments only when they are still over budget at the last resort", () => {
|
|
||||||
const result = fitMessagesToRequestPayloadBudget({
|
|
||||||
messages: [{
|
|
||||||
role: "user",
|
|
||||||
content: [
|
|
||||||
{ type: "text", text: "please inspect this image" },
|
|
||||||
{ type: "image", image: "A".repeat(1_000_000), mediaType: "image/png" },
|
|
||||||
],
|
|
||||||
}],
|
|
||||||
maxPayloadBytes: 20_000,
|
|
||||||
});
|
|
||||||
|
|
||||||
assert.ok(result.estimatedBytes <= 20_000);
|
|
||||||
assert.equal(result.messages.length, 1);
|
|
||||||
const content = result.messages[0].content;
|
|
||||||
assert.ok(Array.isArray(content));
|
|
||||||
assert.deepEqual(content[1], {
|
|
||||||
type: "text",
|
|
||||||
text: "[image attachment omitted to keep the AI request small: mediaType=image/png, 1000000 chars]",
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
test("fitMessagesToRequestPayloadBudget omits older oversized attachment payloads as a last resort", () => {
|
|
||||||
const result = fitMessagesToRequestPayloadBudget({
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: "user",
|
|
||||||
content: [
|
|
||||||
{ type: "text", text: "older image" },
|
|
||||||
{ type: "image", image: "A".repeat(1_000_000), mediaType: "image/png" },
|
|
||||||
],
|
|
||||||
},
|
|
||||||
{ role: "user", content: "current question" },
|
|
||||||
],
|
|
||||||
maxPayloadBytes: 20_000,
|
|
||||||
protectRecentMessages: 2,
|
|
||||||
});
|
|
||||||
|
|
||||||
assert.ok(result.estimatedBytes <= 20_000);
|
|
||||||
assert.equal(result.messages.length, 2);
|
|
||||||
const content = result.messages[0].content;
|
|
||||||
assert.ok(Array.isArray(content));
|
|
||||||
assert.deepEqual(content[1], {
|
|
||||||
type: "text",
|
|
||||||
text: "[image attachment omitted to keep the AI request small: mediaType=image/png, 1000000 chars]",
|
|
||||||
});
|
|
||||||
});
|
|
||||||
@@ -1,335 +0,0 @@
|
|||||||
import type { ModelMessage } from "ai";
|
|
||||||
import { findSafeCompactionSplitIndex } from "./contextCompaction";
|
|
||||||
|
|
||||||
/** Stay below typical nginx `client_max_body_size` defaults (often 1-2 MB). */
|
|
||||||
export const DEFAULT_MAX_REQUEST_PAYLOAD_BYTES = 1_500_000;
|
|
||||||
/** Per tool-result text cap before the sliding window drops older turns. */
|
|
||||||
export const DEFAULT_MAX_TOOL_RESULT_CHARS = 12_000;
|
|
||||||
/** Per plain user/assistant text cap inside a single history message. */
|
|
||||||
export const DEFAULT_MAX_MESSAGE_TEXT_CHARS = 24_000;
|
|
||||||
/** Keep this many recent messages while trimming payload size. */
|
|
||||||
export const DEFAULT_PROTECT_RECENT_PAYLOAD_MESSAGES = 8;
|
|
||||||
|
|
||||||
const TRUNCATION_MARKER = "\n\n[... output truncated for request size ...]\n\n";
|
|
||||||
const HEAD_CHARS = 800;
|
|
||||||
const TAIL_CHARS = 4_000;
|
|
||||||
|
|
||||||
export interface FitMessagesToRequestPayloadBudgetInput {
|
|
||||||
messages: ModelMessage[];
|
|
||||||
maxPayloadBytes?: number;
|
|
||||||
reservedBytes?: number;
|
|
||||||
maxToolResultChars?: number;
|
|
||||||
maxMessageTextChars?: number;
|
|
||||||
protectRecentMessages?: number;
|
|
||||||
preserveLatestMessage?: boolean;
|
|
||||||
}
|
|
||||||
|
|
||||||
export interface FitMessagesToRequestPayloadBudgetResult {
|
|
||||||
messages: ModelMessage[];
|
|
||||||
didAdjust: boolean;
|
|
||||||
estimatedBytes: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
export function estimateUtf8Bytes(value: unknown): number {
|
|
||||||
const text = stringifyForByteEstimate(value);
|
|
||||||
return utf8ByteLength(text);
|
|
||||||
}
|
|
||||||
|
|
||||||
function stringifyForByteEstimate(value: unknown): string {
|
|
||||||
try {
|
|
||||||
return JSON.stringify(value);
|
|
||||||
} catch {
|
|
||||||
return String(value ?? "");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function utf8ByteLength(value: string | undefined): number {
|
|
||||||
const text = value ?? "";
|
|
||||||
if (typeof Buffer !== "undefined" && typeof Buffer.byteLength === "function") {
|
|
||||||
return Buffer.byteLength(text, "utf8");
|
|
||||||
}
|
|
||||||
return new TextEncoder().encode(text).byteLength;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Collapse noisy terminal/build output before measuring payload size.
|
|
||||||
* Keeps semantics while removing repeated blank lines and long duplicate runs.
|
|
||||||
*/
|
|
||||||
export function compressVerboseText(value: string): string {
|
|
||||||
if (!value) return value;
|
|
||||||
|
|
||||||
let compressed = value.replace(/\r\n/g, "\n");
|
|
||||||
compressed = compressed.replace(/\n{4,}/g, "\n\n\n");
|
|
||||||
|
|
||||||
const lines = compressed.split("\n");
|
|
||||||
const deduped: string[] = [];
|
|
||||||
let repeatCount = 0;
|
|
||||||
for (const line of lines) {
|
|
||||||
const previous = deduped[deduped.length - 1];
|
|
||||||
if (previous === line) {
|
|
||||||
repeatCount += 1;
|
|
||||||
if (repeatCount <= 2) deduped.push(line);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
repeatCount = 0;
|
|
||||||
deduped.push(line);
|
|
||||||
}
|
|
||||||
|
|
||||||
return deduped.join("\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
export function truncateTextWithHeadAndTail(
|
|
||||||
value: string,
|
|
||||||
maxChars: number,
|
|
||||||
{
|
|
||||||
headChars = HEAD_CHARS,
|
|
||||||
tailChars = TAIL_CHARS,
|
|
||||||
marker = TRUNCATION_MARKER,
|
|
||||||
}: {
|
|
||||||
headChars?: number;
|
|
||||||
tailChars?: number;
|
|
||||||
marker?: string;
|
|
||||||
} = {},
|
|
||||||
): string {
|
|
||||||
if (value.length <= maxChars) return value;
|
|
||||||
if (maxChars <= marker.length + 16) {
|
|
||||||
return value.slice(0, maxChars);
|
|
||||||
}
|
|
||||||
|
|
||||||
const budget = maxChars - marker.length;
|
|
||||||
let head = Math.min(headChars, budget);
|
|
||||||
let tail = Math.min(tailChars, Math.max(0, budget - head));
|
|
||||||
if (head + tail > budget) {
|
|
||||||
tail = Math.max(0, budget - head);
|
|
||||||
}
|
|
||||||
if (head + tail >= value.length) {
|
|
||||||
return value.slice(0, maxChars);
|
|
||||||
}
|
|
||||||
if (head + tail <= 0) {
|
|
||||||
return value.slice(0, maxChars);
|
|
||||||
}
|
|
||||||
|
|
||||||
return `${value.slice(0, head).trimEnd()}${marker}${value.slice(-tail).trimStart()}`;
|
|
||||||
}
|
|
||||||
|
|
||||||
export function truncateModelMessageForPayload(
|
|
||||||
message: ModelMessage,
|
|
||||||
{
|
|
||||||
maxToolResultChars = DEFAULT_MAX_TOOL_RESULT_CHARS,
|
|
||||||
maxMessageTextChars = DEFAULT_MAX_MESSAGE_TEXT_CHARS,
|
|
||||||
omitLargeAttachments = false,
|
|
||||||
preserveContent = false,
|
|
||||||
}: {
|
|
||||||
maxToolResultChars?: number;
|
|
||||||
maxMessageTextChars?: number;
|
|
||||||
omitLargeAttachments?: boolean;
|
|
||||||
preserveContent?: boolean;
|
|
||||||
} = {},
|
|
||||||
): ModelMessage {
|
|
||||||
if (preserveContent) return message;
|
|
||||||
|
|
||||||
if (typeof message.content === "string") {
|
|
||||||
const compressed = compressVerboseText(message.content);
|
|
||||||
return {
|
|
||||||
...message,
|
|
||||||
content: truncateTextWithHeadAndTail(compressed, maxMessageTextChars),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!Array.isArray(message.content)) return message;
|
|
||||||
|
|
||||||
return {
|
|
||||||
...message,
|
|
||||||
content: message.content.map((part) => truncateContentPartForPayload(part, {
|
|
||||||
maxToolResultChars,
|
|
||||||
maxMessageTextChars,
|
|
||||||
omitLargeAttachments,
|
|
||||||
})),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
function truncateContentPartForPayload(
|
|
||||||
part: unknown,
|
|
||||||
limits: {
|
|
||||||
maxToolResultChars: number;
|
|
||||||
maxMessageTextChars: number;
|
|
||||||
omitLargeAttachments: boolean;
|
|
||||||
},
|
|
||||||
): unknown {
|
|
||||||
if (!part || typeof part !== "object") return part;
|
|
||||||
const record = part as Record<string, unknown>;
|
|
||||||
const type = record.type;
|
|
||||||
|
|
||||||
if (type === "text" && typeof record.text === "string") {
|
|
||||||
const compressed = compressVerboseText(record.text);
|
|
||||||
return {
|
|
||||||
...record,
|
|
||||||
text: truncateTextWithHeadAndTail(compressed, limits.maxMessageTextChars),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
if (type === "tool-result") {
|
|
||||||
const output = record.output;
|
|
||||||
if (output && typeof output === "object") {
|
|
||||||
const outputRecord = output as Record<string, unknown>;
|
|
||||||
if (outputRecord.type === "text" && typeof outputRecord.value === "string") {
|
|
||||||
const compressed = compressVerboseText(outputRecord.value);
|
|
||||||
return {
|
|
||||||
...record,
|
|
||||||
output: {
|
|
||||||
...outputRecord,
|
|
||||||
value: truncateTextWithHeadAndTail(compressed, limits.maxToolResultChars),
|
|
||||||
},
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (limits.omitLargeAttachments && type === "image" && typeof record.image === "string") {
|
|
||||||
return omittedAttachmentTextPart("image", record.image, record);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (limits.omitLargeAttachments && type === "file" && typeof record.data === "string") {
|
|
||||||
return omittedAttachmentTextPart("file", record.data, record);
|
|
||||||
}
|
|
||||||
|
|
||||||
return part;
|
|
||||||
}
|
|
||||||
|
|
||||||
function omittedAttachmentTextPart(
|
|
||||||
label: "image" | "file",
|
|
||||||
payload: string,
|
|
||||||
record: Record<string, unknown>,
|
|
||||||
): { type: "text"; text: string } {
|
|
||||||
const details = [
|
|
||||||
typeof record.filename === "string" ? `filename=${record.filename}` : undefined,
|
|
||||||
typeof record.mediaType === "string" ? `mediaType=${record.mediaType}` : undefined,
|
|
||||||
`${payload.length} chars`,
|
|
||||||
].filter(Boolean).join(", ");
|
|
||||||
|
|
||||||
return {
|
|
||||||
type: "text",
|
|
||||||
text: `[${label} attachment omitted to keep the AI request small: ${details}]`,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
export function fitMessagesToRequestPayloadBudget({
|
|
||||||
messages,
|
|
||||||
maxPayloadBytes = DEFAULT_MAX_REQUEST_PAYLOAD_BYTES,
|
|
||||||
reservedBytes = 0,
|
|
||||||
maxToolResultChars = DEFAULT_MAX_TOOL_RESULT_CHARS,
|
|
||||||
maxMessageTextChars = DEFAULT_MAX_MESSAGE_TEXT_CHARS,
|
|
||||||
protectRecentMessages = DEFAULT_PROTECT_RECENT_PAYLOAD_MESSAGES,
|
|
||||||
preserveLatestMessage = true,
|
|
||||||
}: FitMessagesToRequestPayloadBudgetInput): FitMessagesToRequestPayloadBudgetResult {
|
|
||||||
const budget = Math.max(0, maxPayloadBytes - Math.max(0, reservedBytes));
|
|
||||||
if (budget === 0) {
|
|
||||||
return { messages: [], didAdjust: messages.length > 0, estimatedBytes: 0 };
|
|
||||||
}
|
|
||||||
const originalBytes = estimateUtf8Bytes(messages);
|
|
||||||
if (originalBytes <= budget) {
|
|
||||||
return { messages, didAdjust: false, estimatedBytes: originalBytes };
|
|
||||||
}
|
|
||||||
|
|
||||||
const shouldPreserveMessage = (message: ModelMessage, index: number, list: ModelMessage[]) => (
|
|
||||||
preserveLatestMessage && index === list.length - 1 && message.role === "user"
|
|
||||||
);
|
|
||||||
|
|
||||||
let adjusted = messages.map((message, index) => truncateModelMessageForPayload(message, {
|
|
||||||
maxToolResultChars,
|
|
||||||
maxMessageTextChars,
|
|
||||||
preserveContent: shouldPreserveMessage(message, index, messages),
|
|
||||||
}));
|
|
||||||
let estimatedBytes = estimateUtf8Bytes(adjusted);
|
|
||||||
let didAdjust = estimatedBytes !== originalBytes;
|
|
||||||
if (estimatedBytes <= budget) {
|
|
||||||
return { messages: adjusted, didAdjust, estimatedBytes };
|
|
||||||
}
|
|
||||||
|
|
||||||
const toolResultCaps = [
|
|
||||||
maxToolResultChars,
|
|
||||||
Math.floor(maxToolResultChars * 0.6),
|
|
||||||
Math.floor(maxToolResultChars * 0.35),
|
|
||||||
4_000,
|
|
||||||
2_000,
|
|
||||||
1_000,
|
|
||||||
];
|
|
||||||
const messageTextCaps = [
|
|
||||||
maxMessageTextChars,
|
|
||||||
Math.floor(maxMessageTextChars * 0.6),
|
|
||||||
Math.floor(maxMessageTextChars * 0.35),
|
|
||||||
8_000,
|
|
||||||
4_000,
|
|
||||||
2_000,
|
|
||||||
];
|
|
||||||
|
|
||||||
for (let i = 1; i < toolResultCaps.length; i += 1) {
|
|
||||||
adjusted = adjusted.map((message, index) => truncateModelMessageForPayload(message, {
|
|
||||||
maxToolResultChars: toolResultCaps[i],
|
|
||||||
maxMessageTextChars: messageTextCaps[i],
|
|
||||||
preserveContent: shouldPreserveMessage(message, index, adjusted),
|
|
||||||
}));
|
|
||||||
estimatedBytes = estimateUtf8Bytes(adjusted);
|
|
||||||
didAdjust = true;
|
|
||||||
if (estimatedBytes <= budget) {
|
|
||||||
return { messages: adjusted, didAdjust, estimatedBytes };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let working = [...adjusted];
|
|
||||||
while (working.length > protectRecentMessages) {
|
|
||||||
const splitAt = findSafeCompactionSplitIndex(working, protectRecentMessages);
|
|
||||||
if (splitAt <= 0) break;
|
|
||||||
working = working.slice(splitAt);
|
|
||||||
estimatedBytes = estimateUtf8Bytes(working);
|
|
||||||
didAdjust = true;
|
|
||||||
if (estimatedBytes <= budget) {
|
|
||||||
return { messages: working, didAdjust, estimatedBytes };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const emergencyToolCap = 600;
|
|
||||||
const emergencyTextCap = 1_200;
|
|
||||||
working = working.map((message, index) => truncateModelMessageForPayload(message, {
|
|
||||||
maxToolResultChars: emergencyToolCap,
|
|
||||||
maxMessageTextChars: emergencyTextCap,
|
|
||||||
omitLargeAttachments: true,
|
|
||||||
preserveContent: shouldPreserveMessage(message, index, working),
|
|
||||||
}));
|
|
||||||
estimatedBytes = estimateUtf8Bytes(working);
|
|
||||||
didAdjust = true;
|
|
||||||
|
|
||||||
let emergencyProtect = Math.min(protectRecentMessages, working.length);
|
|
||||||
while (estimatedBytes > budget && working.length > 1) {
|
|
||||||
emergencyProtect = Math.max(1, emergencyProtect - 1);
|
|
||||||
const splitAt = findSafeCompactionSplitIndex(working, emergencyProtect);
|
|
||||||
if (splitAt <= 0) {
|
|
||||||
working = working.slice(-1);
|
|
||||||
} else {
|
|
||||||
working = working.slice(splitAt);
|
|
||||||
}
|
|
||||||
working = working.map((message, index) => truncateModelMessageForPayload(message, {
|
|
||||||
maxToolResultChars: emergencyToolCap,
|
|
||||||
maxMessageTextChars: emergencyTextCap,
|
|
||||||
omitLargeAttachments: true,
|
|
||||||
preserveContent: shouldPreserveMessage(message, index, working),
|
|
||||||
}));
|
|
||||||
estimatedBytes = estimateUtf8Bytes(working);
|
|
||||||
}
|
|
||||||
|
|
||||||
let finalTextCap = emergencyTextCap;
|
|
||||||
let finalToolCap = emergencyToolCap;
|
|
||||||
while (estimatedBytes > budget && (finalTextCap > 32 || finalToolCap > 32)) {
|
|
||||||
finalTextCap = Math.max(32, Math.floor(finalTextCap * 0.6));
|
|
||||||
finalToolCap = Math.max(32, Math.floor(finalToolCap * 0.6));
|
|
||||||
working = working.map((message) => truncateModelMessageForPayload(message, {
|
|
||||||
maxToolResultChars: finalToolCap,
|
|
||||||
maxMessageTextChars: finalTextCap,
|
|
||||||
omitLargeAttachments: true,
|
|
||||||
preserveContent: false,
|
|
||||||
}));
|
|
||||||
estimatedBytes = estimateUtf8Bytes(working);
|
|
||||||
}
|
|
||||||
|
|
||||||
return { messages: working, didAdjust, estimatedBytes };
|
|
||||||
}
|
|
||||||
74
infrastructure/ai/requestPayloadCompression.test.ts
Normal file
74
infrastructure/ai/requestPayloadCompression.test.ts
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
import test from "node:test";
|
||||||
|
import assert from "node:assert/strict";
|
||||||
|
import type { ModelMessage } from "ai";
|
||||||
|
|
||||||
|
import {
|
||||||
|
compressMessagesForRequestTooLargeRetry,
|
||||||
|
compressVerboseText,
|
||||||
|
truncateTextWithHeadAndTail,
|
||||||
|
} from "./requestPayloadCompression.ts";
|
||||||
|
|
||||||
|
test("compressVerboseText collapses repeated blank lines and duplicate runs", () => {
|
||||||
|
const input = "line1\n\n\n\n\nline2\nsame\nsame\nsame\nsame\nline3";
|
||||||
|
const output = compressVerboseText(input);
|
||||||
|
assert.match(output, /line1\n\n\nline2/);
|
||||||
|
assert.ok(output.split("\nsame\n").length <= 3);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("truncateTextWithHeadAndTail keeps both ends of long terminal output", () => {
|
||||||
|
const value = `${"A".repeat(500)}${"B".repeat(20_000)}${"C".repeat(500)}`;
|
||||||
|
const truncated = truncateTextWithHeadAndTail(value, 2_000);
|
||||||
|
assert.ok(truncated.startsWith("AAA"));
|
||||||
|
assert.ok(truncated.includes("[... output truncated for request size ...]"));
|
||||||
|
assert.ok(truncated.endsWith("CCC"));
|
||||||
|
assert.ok(truncated.length <= 2_000);
|
||||||
|
});
|
||||||
|
|
||||||
|
test("compressMessagesForRequestTooLargeRetry compresses messages without enforcing a byte budget", () => {
|
||||||
|
const messages: ModelMessage[] = [
|
||||||
|
{ role: "user", content: "run build" },
|
||||||
|
{
|
||||||
|
role: "tool",
|
||||||
|
content: [{
|
||||||
|
type: "tool-result",
|
||||||
|
toolCallId: "call-1",
|
||||||
|
toolName: "terminal_execute",
|
||||||
|
output: { type: "text", value: "X".repeat(200_000) },
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
role: "user",
|
||||||
|
content: [
|
||||||
|
{ type: "text", text: "please inspect this image" },
|
||||||
|
{ type: "image", image: "A".repeat(1_000_000), mediaType: "image/png" },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = compressMessagesForRequestTooLargeRetry(messages);
|
||||||
|
|
||||||
|
assert.equal(result.didAdjust, true);
|
||||||
|
assert.deepEqual(Object.keys(result).sort(), ["didAdjust", "messages"]);
|
||||||
|
assert.equal(result.messages.length, messages.length);
|
||||||
|
|
||||||
|
const toolContent = result.messages[1].content;
|
||||||
|
assert.ok(Array.isArray(toolContent));
|
||||||
|
const toolPart = toolContent[0] as { output?: { value?: string } };
|
||||||
|
assert.ok((toolPart.output?.value?.length ?? 0) < 5_000);
|
||||||
|
|
||||||
|
const userContent = result.messages[2].content;
|
||||||
|
assert.ok(Array.isArray(userContent));
|
||||||
|
assert.deepEqual(userContent[1], {
|
||||||
|
type: "text",
|
||||||
|
text: "[image attachment omitted to keep the AI request small: mediaType=image/png, 1000000 chars]",
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
test("compressMessagesForRequestTooLargeRetry reports no adjustment for compact messages", () => {
|
||||||
|
const messages: ModelMessage[] = [{ role: "user", content: "hello" }];
|
||||||
|
|
||||||
|
const result = compressMessagesForRequestTooLargeRetry(messages);
|
||||||
|
|
||||||
|
assert.equal(result.didAdjust, false);
|
||||||
|
assert.deepEqual(result.messages, messages);
|
||||||
|
});
|
||||||
167
infrastructure/ai/requestPayloadCompression.ts
Normal file
167
infrastructure/ai/requestPayloadCompression.ts
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
import type { ModelMessage } from "ai";
|
||||||
|
|
||||||
|
const RETRY_MAX_TOOL_RESULT_CHARS = 4_000;
|
||||||
|
const RETRY_MAX_MESSAGE_TEXT_CHARS = 8_000;
|
||||||
|
const TRUNCATION_MARKER = "\n\n[... output truncated for request size ...]\n\n";
|
||||||
|
const HEAD_CHARS = 800;
|
||||||
|
const TAIL_CHARS = 4_000;
|
||||||
|
|
||||||
|
export interface CompressMessagesForRequestTooLargeRetryResult {
|
||||||
|
messages: ModelMessage[];
|
||||||
|
didAdjust: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Collapse noisy terminal/build output.
|
||||||
|
* Keeps semantics while removing repeated blank lines and long duplicate runs.
|
||||||
|
*/
|
||||||
|
export function compressVerboseText(value: string): string {
|
||||||
|
if (!value) return value;
|
||||||
|
|
||||||
|
let compressed = value.replace(/\r\n/g, "\n");
|
||||||
|
compressed = compressed.replace(/\n{4,}/g, "\n\n\n");
|
||||||
|
|
||||||
|
const lines = compressed.split("\n");
|
||||||
|
const deduped: string[] = [];
|
||||||
|
let repeatCount = 0;
|
||||||
|
for (const line of lines) {
|
||||||
|
const previous = deduped[deduped.length - 1];
|
||||||
|
if (previous === line) {
|
||||||
|
repeatCount += 1;
|
||||||
|
if (repeatCount <= 2) deduped.push(line);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
repeatCount = 0;
|
||||||
|
deduped.push(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
return deduped.join("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
export function truncateTextWithHeadAndTail(
|
||||||
|
value: string,
|
||||||
|
maxChars: number,
|
||||||
|
{
|
||||||
|
headChars = HEAD_CHARS,
|
||||||
|
tailChars = TAIL_CHARS,
|
||||||
|
marker = TRUNCATION_MARKER,
|
||||||
|
}: {
|
||||||
|
headChars?: number;
|
||||||
|
tailChars?: number;
|
||||||
|
marker?: string;
|
||||||
|
} = {},
|
||||||
|
): string {
|
||||||
|
if (value.length <= maxChars) return value;
|
||||||
|
if (maxChars <= marker.length + 16) {
|
||||||
|
return value.slice(0, maxChars);
|
||||||
|
}
|
||||||
|
|
||||||
|
const budget = maxChars - marker.length;
|
||||||
|
const head = Math.min(headChars, budget);
|
||||||
|
let tail = Math.min(tailChars, Math.max(0, budget - head));
|
||||||
|
if (head + tail > budget) {
|
||||||
|
tail = Math.max(0, budget - head);
|
||||||
|
}
|
||||||
|
if (head + tail >= value.length) {
|
||||||
|
return value.slice(0, maxChars);
|
||||||
|
}
|
||||||
|
if (head + tail <= 0) {
|
||||||
|
return value.slice(0, maxChars);
|
||||||
|
}
|
||||||
|
|
||||||
|
return `${value.slice(0, head).trimEnd()}${marker}${value.slice(-tail).trimStart()}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function compressMessagesForRequestTooLargeRetry(
|
||||||
|
messages: ModelMessage[],
|
||||||
|
): CompressMessagesForRequestTooLargeRetryResult {
|
||||||
|
let didAdjust = false;
|
||||||
|
const compressedMessages = messages.map((message) => {
|
||||||
|
const compressed = compressModelMessageForRequestRetry(message);
|
||||||
|
if (compressed !== message) didAdjust = true;
|
||||||
|
return compressed;
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
messages: didAdjust ? compressedMessages : messages,
|
||||||
|
didAdjust,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function compressModelMessageForRequestRetry(message: ModelMessage): ModelMessage {
|
||||||
|
if (typeof message.content === "string") {
|
||||||
|
const content = compressAndTruncateText(message.content, RETRY_MAX_MESSAGE_TEXT_CHARS);
|
||||||
|
return content === message.content ? message : { ...message, content };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!Array.isArray(message.content)) return message;
|
||||||
|
|
||||||
|
let didAdjust = false;
|
||||||
|
const content = message.content.map((part) => {
|
||||||
|
const compressed = compressContentPartForRequestRetry(part);
|
||||||
|
if (compressed !== part) didAdjust = true;
|
||||||
|
return compressed;
|
||||||
|
});
|
||||||
|
|
||||||
|
return didAdjust ? { ...message, content } : message;
|
||||||
|
}
|
||||||
|
|
||||||
|
function compressContentPartForRequestRetry(part: unknown): unknown {
|
||||||
|
if (!part || typeof part !== "object") return part;
|
||||||
|
const record = part as Record<string, unknown>;
|
||||||
|
const type = record.type;
|
||||||
|
|
||||||
|
if (type === "text" && typeof record.text === "string") {
|
||||||
|
const text = compressAndTruncateText(record.text, RETRY_MAX_MESSAGE_TEXT_CHARS);
|
||||||
|
return text === record.text ? part : { ...record, text };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type === "tool-result") {
|
||||||
|
const output = record.output;
|
||||||
|
if (output && typeof output === "object") {
|
||||||
|
const outputRecord = output as Record<string, unknown>;
|
||||||
|
if (outputRecord.type === "text" && typeof outputRecord.value === "string") {
|
||||||
|
const value = compressAndTruncateText(outputRecord.value, RETRY_MAX_TOOL_RESULT_CHARS);
|
||||||
|
if (value === outputRecord.value) return part;
|
||||||
|
return {
|
||||||
|
...record,
|
||||||
|
output: {
|
||||||
|
...outputRecord,
|
||||||
|
value,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type === "image" && typeof record.image === "string") {
|
||||||
|
return omittedAttachmentTextPart("image", record.image, record);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type === "file" && typeof record.data === "string") {
|
||||||
|
return omittedAttachmentTextPart("file", record.data, record);
|
||||||
|
}
|
||||||
|
|
||||||
|
return part;
|
||||||
|
}
|
||||||
|
|
||||||
|
function compressAndTruncateText(value: string, maxChars: number): string {
|
||||||
|
return truncateTextWithHeadAndTail(compressVerboseText(value), maxChars);
|
||||||
|
}
|
||||||
|
|
||||||
|
function omittedAttachmentTextPart(
|
||||||
|
label: "image" | "file",
|
||||||
|
payload: string,
|
||||||
|
record: Record<string, unknown>,
|
||||||
|
): { type: "text"; text: string } {
|
||||||
|
const details = [
|
||||||
|
typeof record.filename === "string" ? `filename=${record.filename}` : undefined,
|
||||||
|
typeof record.mediaType === "string" ? `mediaType=${record.mediaType}` : undefined,
|
||||||
|
`${payload.length} chars`,
|
||||||
|
].filter(Boolean).join(", ");
|
||||||
|
|
||||||
|
return {
|
||||||
|
type: "text",
|
||||||
|
text: `[${label} attachment omitted to keep the AI request small: ${details}]`,
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -15,7 +15,7 @@ import {
|
|||||||
} from '../shared/toolExecutors';
|
} from '../shared/toolExecutors';
|
||||||
import { requestApproval } from '../shared/approvalGate';
|
import { requestApproval } from '../shared/approvalGate';
|
||||||
import { reserveSessionSlot } from '../shared/sessionExecutionQueue';
|
import { reserveSessionSlot } from '../shared/sessionExecutionQueue';
|
||||||
import { truncateTextWithHeadAndTail } from '../requestPayloadBudget';
|
import { truncateTextWithHeadAndTail } from '../requestPayloadCompression';
|
||||||
|
|
||||||
const MAX_LIVE_TERMINAL_STDOUT_CHARS = 24_000;
|
const MAX_LIVE_TERMINAL_STDOUT_CHARS = 24_000;
|
||||||
const MAX_LIVE_TERMINAL_STDERR_CHARS = 12_000;
|
const MAX_LIVE_TERMINAL_STDERR_CHARS = 12_000;
|
||||||
|
|||||||
Reference in New Issue
Block a user