PR #543: chat UIX/UX fixes — thinking indicators, message dedup, streaming stability (JohnGuidry)

Addresses #572 (double chat responses) + #561 (stuck Thinking indicator).
Adds optimistic-message-reinject hook, vite loadEnv→process.env bridge for SSR
bearer token, dedup + streaming stability. eslint --fix on touched files
(net lint errors 1700→1588). Build GREEN, test 33 fail/694 pass (zero regressions).
This commit is contained in:
Aurora
2026-06-05 06:01:21 -04:00
parent ef2e4ba02b
commit 5271ca9ad3
14 changed files with 704 additions and 219 deletions

1
.gitignore vendored
View File

@@ -16,6 +16,7 @@ build
.vinxi
.nitro
.tanstack
.vite
# Environment variables
.env

View File

@@ -1,15 +1,10 @@
import { createFileRoute } from '@tanstack/react-router'
import { buildResolvedSessionHeaders } from '../../lib/send-stream-session-headers'
import { buildWorkspaceScopedTextMessage } from '../../lib/workspace-message-scope'
import {
collectSyntheticLiveToolEvents,
createSyntheticLiveToolTracker,
} from './-send-stream-live-tools'
import { resolveSessionKey } from '../../server/session-utils'
import { isAuthenticated } from '../../server/auth-middleware'
import { requireJsonContentType } from '../../server/rate-limit'
import { publishChatEvent } from '../../server/chat-event-bus'
import { loadWorkspaceCatalog } from './workspace'
import {
registerActiveSendRun,
unregisterActiveSendRun,
@@ -22,8 +17,8 @@ import {
upsertRunToolCall,
} from '../../server/run-store'
import { getChatMode } from '../../server/gateway-capabilities'
import { ensureLocalSession, appendLocalMessage, getLocalMessages, touchLocalSession } from '../../server/local-session-store'
import { getLocalProviderDef, getDiscoveredModels } from '../../server/local-provider-discovery'
import { appendLocalMessage, ensureLocalSession, getLocalMessages, touchLocalSession } from '../../server/local-session-store'
import { getDiscoveredModels, getLocalProviderDef } from '../../server/local-provider-discovery'
import { openaiChat } from '../../server/openai-compat-api'
import { streamResponses } from '../../server/responses-api'
import { selectPortableConversationHistory } from '../../server/portable-history'
@@ -36,6 +31,11 @@ import {
listSessions,
streamChat,
} from '../../server/claude-api'
import { loadWorkspaceCatalog } from './workspace'
import {
collectSyntheticLiveToolEvents,
createSyntheticLiveToolTracker,
} from './-send-stream-live-tools'
import type {OpenAICompatContentPart, OpenAICompatMessage} from '../../server/openai-compat-api';
// Claude agent runs can take 5+ minutes with complex tool chains
const SEND_STREAM_RUN_TIMEOUT_MS = 600_000
@@ -386,9 +386,42 @@ export const Route = createFileRoute('/api/send-stream')({
let streamTimeoutTimer: ReturnType<typeof setTimeout> | null = null
let heartbeatTimer: ReturnType<typeof setInterval> | null = null
const abortController = new AbortController()
// Close out the SSE stream — stop enqueueing, clear timers, and
// abort the upstream Hermes gateway request so the agent stops
// processing. Does NOT touch run status (persistActiveRun etc.).
// The abort path (request.signal / handleAbort) owns run cleanup.
let closeStream = () => {
if (streamClosed) return
streamClosed = true
if (heartbeatTimer) {
clearInterval(heartbeatTimer)
heartbeatTimer = null
}
if (unregisterTimer) {
clearTimeout(unregisterTimer)
unregisterTimer = null
}
if (streamTimeoutTimer) {
clearTimeout(streamTimeoutTimer)
streamTimeoutTimer = null
}
abortController.abort()
}
// When the client hits Stop / navigates away / closes the tab, the
// request.signal fires abort. Stop the upstream agent (closeStream)
// and clean up run tracking so we don't burn API credits on an orphan.
function handleAbort() {
if (activeRunId && !streamClosed) {
persistActiveRun((runSessionKey, activeId) =>
markRunStatus(runSessionKey, activeId, 'handoff'),
)
unregisterActiveSendRun(activeRunId)
activeRunId = null
}
closeStream()
}
request.signal.addEventListener('abort', () => handleAbort(), { once: true })
const persistRunStarted = (
runId: string | undefined,
@@ -419,6 +452,11 @@ export const Route = createFileRoute('/api/send-stream')({
async start(controller) {
let heartbeatTimer: ReturnType<typeof setInterval> | null = null
let lastClientEventAt = Date.now()
// Track the last human-readable activity so the heartbeat can
// forward it to the UI. Without this the ThinkingBubble shows a
// static "Thinking…" for minutes when the agent is reasoning
// without tool calls, making it look hung.
let lastActivity: string | null = null
const enqueueRaw = (payload: string) => {
if (streamClosed) return
controller.enqueue(encoder.encode(payload))
@@ -462,10 +500,6 @@ export const Route = createFileRoute('/api/send-stream')({
clearTimeout(streamTimeoutTimer)
streamTimeoutTimer = null
}
if (heartbeatTimer) {
clearInterval(heartbeatTimer)
heartbeatTimer = null
}
if (activeRunId) {
unregisterActiveSendRun(activeRunId)
activeRunId = null
@@ -481,9 +515,11 @@ export const Route = createFileRoute('/api/send-stream')({
// Keep the SSE stream alive during long agent processing (tool calls,
// slow LLM responses on large contexts). Without this the client-side
// no-activity timer fires after 2-3 min and aborts the stream.
// Every 10s we also forward the last known activity so the UI can
// show meaningful progress instead of a static "Thinking…".
heartbeatTimer = setInterval(() => {
sendEvent('heartbeat', { timestamp: Date.now() })
}, 30_000)
sendEvent('heartbeat', { timestamp: Date.now(), activity: lastActivity })
}, 10_000)
try {
if (chatMode === 'portable') {
@@ -514,6 +550,7 @@ export const Route = createFileRoute('/api/send-stream')({
sessionKey: portableSessionKey,
friendlyId: portableFriendlyId,
})
lastActivity = 'Processing your message...'
try {
const userContent = buildMultimodalContent(
@@ -569,7 +606,7 @@ export const Route = createFileRoute('/api/send-stream')({
const useResponsesApi =
process.env.HERMES_USE_RESPONSES === '1' && !localBaseUrl
if (useResponsesApi) {
let thinking = ''
const thinking = ''
// Track tool calls by callId so a `tool.completed`
// followed by `tool.output` can carry the full
// arguments forward without losing them.
@@ -615,7 +652,7 @@ export const Route = createFileRoute('/api/send-stream')({
})
const argsForCard =
ev.args && typeof ev.args === 'object'
? (ev.args as Record<string, unknown>)
? (ev.args)
: undefined
persistActiveRun((runSessionKey, activeId) =>
upsertRunToolCall(runSessionKey, activeId, {
@@ -633,6 +670,7 @@ export const Route = createFileRoute('/api/send-stream')({
sessionKey: portableSessionKey,
runId,
})
lastActivity = `Running: ${ev.name.replace(/_/g, ' ')}`
continue
}
if (ev.kind === 'tool.completed') {
@@ -649,7 +687,7 @@ export const Route = createFileRoute('/api/send-stream')({
const state = toolStateByCallId.get(ev.callId)
const argsForCard =
state?.args && typeof state.args === 'object'
? (state.args as Record<string, unknown>)
? (state.args)
: undefined
const name = state?.name || 'tool'
persistActiveRun((runSessionKey, activeId) =>
@@ -670,6 +708,7 @@ export const Route = createFileRoute('/api/send-stream')({
sessionKey: portableSessionKey,
runId,
})
lastActivity = `Completed: ${name.replace(/_/g, ' ')}`
continue
}
if (ev.kind === 'completed') {
@@ -1012,6 +1051,7 @@ export const Route = createFileRoute('/api/send-stream')({
sessionKey: sessionKeyFromEvent,
friendlyId: sessionKeyFromEvent,
})
lastActivity = 'Processing your message...'
}
if (event === 'run.started') {
@@ -1137,6 +1177,7 @@ export const Route = createFileRoute('/api/send-stream')({
)
sendEvent('tool', translated)
skipPublish || publishChatEvent('tool', translated)
lastActivity = `Running: ${toolName.replace(/_/g, ' ')}`
return
}
@@ -1155,6 +1196,7 @@ export const Route = createFileRoute('/api/send-stream')({
}
sendEvent('thinking', translated)
skipPublish || publishChatEvent('thinking', translated)
lastActivity = delta.length > 60 ? delta.slice(0, 60) + '...' : delta
return
}
const translated = {
@@ -1203,6 +1245,7 @@ export const Route = createFileRoute('/api/send-stream')({
)
sendEvent('tool', translated)
skipPublish || publishChatEvent('tool', translated)
lastActivity = `Completed: ${toolName.replace(/_/g, ' ')}`
return
}
@@ -1376,10 +1419,10 @@ export const Route = createFileRoute('/api/send-stream')({
)
const recent = persistedMessages.slice(
sliceFrom,
) as Array<Record<string, unknown>>
)
let lastAssistantIndex = -1
for (let i = recent.length - 1; i >= 0; i--) {
const m = recent[i] as Record<string, unknown>
const m = recent[i]
if (m && m.role === 'assistant') {
lastAssistantIndex = i
break
@@ -1388,7 +1431,7 @@ export const Route = createFileRoute('/api/send-stream')({
if (lastAssistantIndex >= 0) {
const lastAssistant = recent[
lastAssistantIndex
] as Record<string, unknown>
]
const rawToolCalls = (lastAssistant.tool_calls ??
(lastAssistant as any).toolCalls) as
| Array<Record<string, unknown>>
@@ -1478,28 +1521,17 @@ export const Route = createFileRoute('/api/send-stream')({
}
},
cancel() {
// Browser navigation/unmount cancels the response reader. That
// must not cancel the Hermes run itself: the chat/conductor should
// keep thinking server-side so the user can return and recover the
// answer from session history. Mark this client stream closed so we
// stop enqueueing SSE chunks, but deliberately leave the upstream
// abortController alone.
streamClosed = true
if (unregisterTimer) {
clearTimeout(unregisterTimer)
unregisterTimer = null
}
if (streamTimeoutTimer) {
clearTimeout(streamTimeoutTimer)
streamTimeoutTimer = null
}
if (activeRunId) {
// User clicked Stop, navigated away, or browser closed the tab.
// Mark the stream complete, persist the run as 'handoff' so
// session history reflects the interruption, then delegate to
// closeStream() for timer/controller cleanup. Delegate instead
// of duplicating cleanup logic to keep the two paths in sync.
if (activeRunId && !streamClosed) {
persistActiveRun((runSessionKey, activeId) =>
markRunStatus(runSessionKey, activeId, 'handoff'),
)
unregisterActiveSendRun(activeRunId)
activeRunId = null
}
closeStream()
},
})

View File

@@ -1,8 +1,5 @@
// Module-level local model override — set by composer when user picks a local model
// Avoids prop threading. Reset when switching back to cloud models.
export let _localModelOverride = ''
export function setLocalModelOverride(model: string) { _localModelOverride = model }
import {
useCallback,
useEffect,
@@ -21,12 +18,12 @@ import {
textFromMessage,
} from './utils'
import {
advanceStickyStreamingText,
createResponseWaitSnapshot,
createOptimisticMessage,
createResponseWaitSnapshot,
isTerminalActiveRunStatus,
shouldClearWaitingForAssistantMessage,
type ResponseWaitSnapshot,
shouldClearWaitingForAssistantMessage
} from './chat-screen-utils'
import {
appendHistoryMessage,
@@ -43,21 +40,20 @@ import { ChatEmptyState } from './components/chat-empty-state'
import { ChatComposer } from './components/chat-composer'
import { ConnectionStatusMessage } from './components/connection-status-message'
import {
clearPendingSendForSession,
consumePendingSend,
hasPendingGeneration,
hasPendingSend,
isRecentSession,
resetPendingSend,
setPendingGeneration,
clearPendingSendForSession,
} from './pending-send'
import { useChatMeasurements } from './hooks/use-chat-measurements'
import { useChatHistory } from './hooks/use-chat-history'
import { useRealtimeChatHistory } from './hooks/use-realtime-chat-history'
import { snapshotOptimisticUserMessages } from './hooks/optimistic-message-reinject'
import { useSmoothStreamingText } from './hooks/use-smooth-streaming-text'
import { useStreamingMessage } from './hooks/use-streaming-message'
import { playChatComplete } from '@/lib/sounds'
import { useChatSettingsStore } from '@/hooks/use-chat-settings'
import { useActiveRunCheck } from './hooks/use-active-run-check'
import { useChatMobile } from './hooks/use-chat-mobile'
import { useChatSessions } from './hooks/use-chat-sessions'
@@ -70,6 +66,7 @@ import {
CHAT_PENDING_COMMAND_STORAGE_KEY,
CHAT_RUN_COMMAND_EVENT,
} from './chat-events'
import type {ResponseWaitSnapshot} from './chat-screen-utils';
import type {
ChatComposerAttachment,
ChatComposerHandle,
@@ -79,6 +76,9 @@ import type {
import type { ApprovalRequest } from '@/screens/gateway/lib/approvals-store'
import type { ChatAttachment, ChatMessage, SessionMeta } from './types'
import type { ChatRunCommandDetail } from './chat-events'
import type {AgentActivity} from '@/stores/chat-activity-store';
import { useChatSettingsStore } from '@/hooks/use-chat-settings'
import { playChatComplete } from '@/lib/sounds'
import {
addApproval,
loadApprovals,
@@ -101,12 +101,16 @@ import { MobileSessionsPanel } from '@/components/mobile-sessions-panel'
import { ContextAlertModal } from '@/components/usage-meter/context-alert-modal'
import { ErrorToastContainer, showErrorToast } from '@/components/error-toast'
// ContextMeter removed — ContextBar (PR #32) replaces it
import { useChatStore, persistRecoveryMessage } from '@/stores/chat-store'
import { persistRecoveryMessage, useChatStore } from '@/stores/chat-store'
import { useSessionModelStore } from '@/stores/session-model-store'
import { useResearchCard } from '@/hooks/use-research-card'
// MOBILE_TAB_BAR_OFFSET removed — tab bar always hidden in chat
import { useTapDebug } from '@/hooks/use-tap-debug'
import { useChatMode } from '@/hooks/use-chat-mode'
import { useChatActivityStore, type AgentActivity } from '@/stores/chat-activity-store'
import { useChatActivityStore } from '@/stores/chat-activity-store'
export let _localModelOverride = ''
export function setLocalModelOverride(model: string) { _localModelOverride = model }
type ChatScreenProps = {
activeFriendlyId: string
@@ -481,45 +485,6 @@ export function ChatScreen({
const portableChatFriendlyId = isPortableMode ? 'main' : activeFriendlyId
// --- Issue #43 fix: lift waitingForResponse into persistent Zustand store ---
// The store survives component unmount, so navigating away mid-stream
// doesn't lose the "waiting" flag. sessionStorage backup handles reloads.
const storeWaiting = useChatStore((s) => s.waitingSessionKeys)
// resolvedSessionKey isn't available yet (defined below), so we track it via
// a ref that's updated once it resolves. The memo/callback read the ref.
const sessionKeyForWaiting = useRef<string | undefined>(undefined)
const [activeRunCheckDone, setActiveRunCheckDone] = useState(false)
// Track stale-restored sessions that need API verification before showing thinking.
// On page reload, sessionStorage may contain stale "waiting" flags from a
// previous session. We must not show the thinking indicator until the
// active-run API check confirms the run is genuinely active. (Issue #449)
const pendingVerifySessionKeyRef = useRef<string | undefined>(undefined)
const waitingForResponse = useMemo(() => {
const key = sessionKeyForWaiting.current
if (!key) return hasPendingSend() || hasPendingGeneration()
// If we restored waiting state from sessionStorage but haven't verified
// with the API yet, don't show thinking — it might be stale (Issue #449).
if (
storeWaiting.has(key) &&
pendingVerifySessionKeyRef.current === key &&
!activeRunCheckDone
) {
return false
}
return storeWaiting.has(key)
}, [storeWaiting, activeRunCheckDone])
const setWaitingForResponse = useCallback((waiting: boolean) => {
const store = useChatStore.getState()
const key = sessionKeyForWaiting.current
if (!key) return
if (waiting) {
store.setSessionWaiting(key)
} else {
store.clearSessionWaiting(key)
}
}, [])
const [liveToolActivity, setLiveToolActivity] = useState<
Array<{ name: string; timestamp: number }>
>([])
@@ -540,10 +505,18 @@ export function ChatScreen({
if (typeof window === 'undefined') return 'low'
const key = `claude-thinking-${activeFriendlyId || 'new'}`
const stored = window.sessionStorage.getItem(key)
if (stored === 'off' || stored === 'low' || stored === 'adaptive')
if (stored === 'off' || stored === 'low' || stored === 'medium' || stored === 'high' || stored === 'adaptive')
return stored
return 'low'
})
// Tracks whether the user has explicitly picked a thinking level for this session.
// A missing/absent sessionStorage key means we should fall back to the Hermes config default.
const thinkingInitializedByUserRef = useRef(false)
useEffect(() => {
if (typeof window === 'undefined') return
const key = `claude-thinking-${activeFriendlyId || 'new'}`
thinkingInitializedByUserRef.current = window.sessionStorage.getItem(key) !== null
}, [activeFriendlyId])
const { alertOpen, alertThreshold, alertPercent, dismissAlert } =
useContextAlert()
@@ -611,10 +584,61 @@ export function ChatScreen({
portableMode: isPortableMode,
})
// --- Waiting state management (Issue #43 + #449) ---
// resolvedSessionKey is now available (defined above from useChatHistory).
const storeWaiting = useChatStore((s) => s.waitingSessionKeys)
const sessionKeyForWaiting = useRef<string | undefined>(undefined)
const pendingVerifySessionKeyRef = useRef<string | undefined>(undefined)
// Keep the waiting-state ref in sync with the resolved session key
sessionKeyForWaiting.current = resolvedSessionKey
// Detect stale restored waiting state from sessionStorage — we need API
// Synchronously detect stale waiting state from sessionStorage.
// This runs during render (not in an effect) so the guard in
// waitingForResponse is active on the very first render, preventing
// a flash of the "Thinking" indicator when reopening an old session.
const needsStaleCheck =
resolvedSessionKey &&
!isNewChat &&
storeWaiting.has(resolvedSessionKey) &&
pendingVerifySessionKeyRef.current !== resolvedSessionKey
if (needsStaleCheck) {
pendingVerifySessionKeyRef.current = resolvedSessionKey
}
// Track whether the active-run API check has completed.
// Initialize to false when we detect stale state (needs verification),
// true otherwise. This prevents showing "Thinking" until the API confirms.
const [activeRunCheckDone, setActiveRunCheckDone] = useState(!needsStaleCheck)
const waitingForResponse = useMemo(() => {
const key = sessionKeyForWaiting.current
if (!key) return hasPendingSend() || hasPendingGeneration()
// If we restored waiting state from sessionStorage but haven't verified
// with the API yet, don't show thinking — it might be stale (Issue #449).
if (
storeWaiting.has(key) &&
pendingVerifySessionKeyRef.current === key &&
!activeRunCheckDone
) {
return false
}
return storeWaiting.has(key)
}, [storeWaiting, activeRunCheckDone])
const setWaitingForResponse = useCallback((waiting: boolean) => {
const store = useChatStore.getState()
const key = sessionKeyForWaiting.current
if (!key) return
if (waiting) {
store.setSessionWaiting(key)
} else {
store.clearSessionWaiting(key)
}
}, [])
// verification before showing thinking (Issue #449).
useEffect(() => {
const currentSessionKey = resolvedSessionKey
@@ -868,13 +892,12 @@ export function ChatScreen({
const streamStart = useCallback(() => {
if (!activeFriendlyId || isNewChat) return
// Bug #3 fix: no more 350ms polling loop — SSE handles realtime updates.
// Single delayed fetch as fallback to catch the initial response.
if (streamTimer.current) window.clearTimeout(streamTimer.current)
streamTimer.current = window.setTimeout(() => {
if (activeRealtimeStreamingRef.current) return
refreshHistoryRef.current()
}, 2000)
// No aggressive delayed refetch here — it wipes optimistic user messages
// from the cache before the server has echoed them, causing the user's
// message to disappear until the agent completes. The existing failsafes
// (5s + 10s timeouts at lines below, active-run polling) handle the case
// where SSE misses the done event.
void activeFriendlyId // keep dep for eslint
}, [activeFriendlyId, isNewChat])
refreshHistoryRef.current = function refreshHistory() {
@@ -883,37 +906,21 @@ export function ChatScreen({
// Snapshot any unconfirmed optimistic user messages BEFORE refetch.
// The refetch replaces the query cache with server data — if the server
// hasn't processed the user's POST yet, the optimistic message vanishes.
const currentMessages = (historyQuery.data as any)?.messages as
| Array<ChatMessage>
| undefined
const pendingOptimistic = (currentMessages ?? []).filter((msg) => {
const raw = msg as Record<string, unknown>
return (
msg.role === 'user' &&
(normalizeMessageValue(raw.__optimisticId).startsWith('opt-') ||
normalizeMessageValue(raw.status) === 'sending')
)
})
void historyQuery.refetch().then(() => {
// Re-inject optimistic messages that weren't in the server response
if (pendingOptimistic.length === 0) return
const historySessionKey = isPortableMode
? 'main'
: activeSessionKey ||
sessionKeyForHistory ||
resolvedSessionKey ||
'main'
if (!portableChatFriendlyId || !historySessionKey) return
for (const optimistic of pendingOptimistic) {
appendHistoryMessage(
const reInjectOptimistic = snapshotOptimisticUserMessages(
queryClient,
portableChatFriendlyId,
historySessionKey,
optimistic,
)
}
void historyQuery.refetch().then(() => {
// Re-inject optimistic messages that weren't in the server response
reInjectOptimistic()
})
}
@@ -1018,6 +1025,29 @@ export function ChatScreen({
retry: false,
})
// Fetch the configured reasoning effort so the Chat Controls default matches
// what Hermes actually uses instead of hardcoding 'low'.
const reasoningEffortQuery = useQuery({
queryKey: ['hermes-config', 'reasoning-effort'],
queryFn: async () => {
try {
const res = await fetch('/api/hermes-config')
if (!res.ok) return 'low'
const data = await res.json() as { config?: Record<string, unknown> }
const agentSection = data?.config?.agent
if (agentSection && typeof agentSection === 'object' && !Array.isArray(agentSection)) {
const effort = (agentSection as Record<string, unknown>).reasoning_effort
if (effort === 'off' || effort === 'low' || effort === 'medium' || effort === 'high') return effort
}
return 'low'
} catch {
return 'low'
}
},
staleTime: 10 * 60 * 1000,
retry: false,
})
const availableModelIds = useMemo(() => {
const models = modelsQuery.data?.models || []
return models.map((m: any) => m.id).filter((id: string) => id)
@@ -1054,6 +1084,16 @@ export function ChatScreen({
}
}, [currentModel, activeFriendlyId])
// If no per-session thinking level override exists, inherit from Hermes config
useEffect(() => {
if (thinkingInitializedByUserRef.current) return
const configEffort = reasoningEffortQuery.data
if (!configEffort) return
if (configEffort === 'off' || configEffort === 'low' || configEffort === 'medium' || configEffort === 'high') {
setThinkingLevel(configEffort)
}
}, [reasoningEffortQuery.data])
// Persist thinking level changes to sessionStorage
const handleThinkingLevelChange = useCallback(
(level: ThinkingLevel) => {
@@ -1378,7 +1418,7 @@ export function ChatScreen({
return deduped
}
const nextMessages = [...deduped]
let nextMessages = [...deduped]
const streamToolCalls = activeToolCalls.map((toolCall) => ({
...toolCall,
phase: toolCall.phase,
@@ -1394,6 +1434,42 @@ export function ChatScreen({
__streamToolCalls: streamToolCalls,
} as ChatMessage
// Check if the server has already returned a completed assistant message
// that overlaps with the streaming text. If so, drop the streaming
// placeholder to avoid showing the same response twice.
const streamingText = stableActiveStreamingText.trim()
const hasServerAssistantVersion = nextMessages.some((msg) => {
if (msg.role !== 'assistant') return false
if (msg.__streamingStatus === 'streaming') return false
// Any non-streaming assistant message that appears after the last user
// message is potentially the same response — match by text overlap
if (streamingText.length > 0) {
const msgText = textFromMessage(msg).trim()
if (msgText.length > 0 && (
msgText === streamingText ||
msgText.startsWith(streamingText) ||
streamingText.startsWith(msgText)
)) {
return true
}
}
// Also match by tool calls: if the server message has the same tool
// calls as the streaming placeholder, it's the same response
if (streamToolCalls.length > 0) {
const msgContent = Array.isArray(msg.content) ? msg.content : []
const msgToolCalls = msgContent.filter((p: any) => p.type === 'toolCall')
if (msgToolCalls.length > 0 && msgToolCalls.length === streamToolCalls.length) {
return streamToolCalls.every((stc: any) =>
msgToolCalls.some((mtc: any) => mtc.name === stc.name)
)
}
}
return false
})
if (hasServerAssistantVersion) {
return nextMessages
}
const existingStreamIdx = nextMessages.findIndex(
(message) => message.__streamingStatus === 'streaming',
)
@@ -1403,6 +1479,13 @@ export function ChatScreen({
...nextMessages[existingStreamIdx],
...streamingMsg,
}
// Remove any other streaming messages (e.g. from mergeHistoryMessages
// appending a realtime message after finalDisplayMessages already
// injected a placeholder). Keep only one streaming placeholder.
const keepIdx = existingStreamIdx
nextMessages = nextMessages.filter(
(m, i) => i === keepIdx || m.__streamingStatus !== 'streaming',
)
return nextMessages
}

View File

@@ -36,8 +36,8 @@ import type {
} from '@/components/slash-command-menu'
import {
DEFAULT_SLASH_COMMANDS,
mergeSlashCommands,
SlashCommandMenu,
mergeSlashCommands,
} from '@/components/slash-command-menu'
import {
PromptInput,
@@ -61,6 +61,7 @@ import {
emitSearchModalEvent,
} from '@/hooks/use-search-modal'
import { setLocalModelOverride } from '@/screens/chat/local-model-override'
import { formatModelName } from '@/lib/format-model-name'
type ChatComposerAttachment = {
id: string
@@ -72,7 +73,7 @@ type ChatComposerAttachment = {
kind?: 'image' | 'file' | 'audio'
}
type ThinkingLevel = 'off' | 'low' | 'medium' | 'high'
type ThinkingLevel = 'off' | 'low' | 'medium' | 'high' | 'adaptive'
type ChatComposerProps = {
onSubmit: (
@@ -565,6 +566,43 @@ function getResolvedModelKey(model: string, provider?: string): string {
return `${normalizedProvider}/${normalizedModel}`
}
/**
* Checks whether a model entry matches the current model string.
*
* The current model can arrive in several formats depending on the source:
* - "provider/model-id" (from session-status API, persisted session model)
* - "model-id" (bare ID from config or old data)
*
* The entry always has { id, provider } from the models catalog.
*
* We match if:
* 1. The current model equals the entry ID exactly (bare match), or
* 2. The current model ends with "/<entry.id>" (provider-prefixed match), or
* 3. The resolved key from entry (provider/id) equals the current model.
*/
function isCurrentModel(
currentModel: string,
entryId: string,
entryProvider: string,
): boolean {
const cm = currentModel.trim()
const eid = entryId.trim()
const eprov = entryProvider.trim()
if (!cm || !eid) return false
// Exact match (bare ID)
if (cm === eid) return true
// Current model is "something/<entryId>"
if (cm.endsWith(`/${eid}`)) return true
// Resolved entry key matches current model exactly
const resolved = eprov ? `${eprov}/${eid}` : eid
if (resolved === cm) return true
return false
}
function isCanvasSupported(): boolean {
if (typeof document === 'undefined') return false
try {
@@ -1671,7 +1709,7 @@ function ChatComposerComponent({
const promptPlaceholder = isMobileViewport
? 'Message...'
: 'Ask anything... (↵ to send · ⇧↵ new line · ⌘⇧M switch model)'
const [serverCommands, setServerCommands] = useState<SlashCommandDefinition[]>([])
const [serverCommands, setServerCommands] = useState<Array<SlashCommandDefinition>>([])
useEffect(() => {
fetch('/api/commands')
@@ -2566,9 +2604,11 @@ function ChatComposerComponent({
unpinnedGroups.set(entry.provider, group)
}
const renderEntry = (entry: (typeof parsed)[0]) => {
const isActive =
entry.id === currentModel ||
`${defaultProvider}/${entry.id}` === currentModel
const isActive = isCurrentModel(
persistedSessionModel || currentModel,
entry.id,
entry.provider,
)
return (
<div
key={entry.id}
@@ -2757,9 +2797,9 @@ function ChatComposerComponent({
setIsThinkingMenuOpen(false)
setIsModelMenuOpen(false)
}}
className="inline-flex h-8 items-center gap-1 rounded-full bg-primary-100/70 px-2 text-xs font-medium text-primary-600 transition-colors hover:bg-primary-200/80 dark:hover:bg-primary-800/60"
title="Chat controls"
aria-label="Chat controls"
className="inline-flex h-8 items-center gap-1.5 rounded-full bg-primary-100/70 px-2 text-xs font-medium text-primary-600 transition-colors hover:bg-primary-200/80 dark:hover:bg-primary-800/60"
title={`Chat controls · ${modelButtonLabel}`}
aria-label={`Chat controls, current model: ${modelButtonLabel}`}
>
<svg
width="13"
@@ -2779,6 +2819,7 @@ function ChatComposerComponent({
<circle cx="15" cy="12" r="2" fill="currentColor" stroke="none" />
<circle cx="11" cy="18" r="2" fill="currentColor" stroke="none" />
</svg>
<span className="max-w-[5rem] truncate sm:max-w-[8rem] md:max-w-[10rem]">{formatModelName(modelButtonLabel)}</span>
<HugeiconsIcon icon={ArrowDown01Icon} size={11} />
</button>
{isControlsMenuOpen ? (
@@ -2946,7 +2987,11 @@ function ChatComposerComponent({
unpinnedGroups.set(entry.provider, group)
}
const renderEntry = (entry: (typeof parsed)[0]) => {
const isActive = entry.id === currentModel || `${defaultProvider}/${entry.id}` === currentModel
const isActive = isCurrentModel(
persistedSessionModel || currentModel,
entry.id,
entry.provider,
)
return (
<div key={entry.id} className="group relative flex items-center">
<button

View File

@@ -64,7 +64,7 @@ function formatMobileSessionTitle(rawTitle: string): string {
return title
}
type ThinkingLevel = 'off' | 'low' | 'adaptive'
type ThinkingLevel = 'off' | 'low' | 'medium' | 'high' | 'adaptive'
type ChatHeaderProps = {
activeTitle: string

View File

@@ -25,6 +25,7 @@ import { AssistantAvatar } from '@/components/avatars'
import { cn } from '@/lib/utils'
import { hapticTap } from '@/lib/haptics'
import { CHAT_OPEN_MESSAGE_SEARCH_EVENT } from '@/screens/chat/chat-events'
import { useChatStore } from '@/stores/chat-store'
/** Duration (ms) the thinking indicator stays visible after waitingForResponse
* clears, giving the first response message time to render before the
@@ -179,28 +180,49 @@ type ThinkingBubbleProps = {
liveToolActivity?: Array<{ name: string; timestamp: number }>
researchCard?: UseResearchCardResult
isCompacting?: boolean
/** When true, always show "Thinking…" regardless of activity. Used for the
* first 10s before the delayed activity feed appears. */
forceSimple?: boolean
}
/**
* Premium shimmer thinking bubble — matches the assistant message position
* with three bouncing dots, a gradient shimmer sweep, and a dynamic status
* Shows a thinking indicator with animated dots and a meaningful status
* label that reflects what's actually happening (tool calls, etc.).
* When forceSimple is true, suppresses all activity labels — just "Thinking…".
*/
function ThinkingBubble({
activeToolCalls: _activeToolCalls = [],
liveToolActivity: _liveToolActivity = [],
activeToolCalls = [],
liveToolActivity = [],
researchCard,
isCompacting = false,
forceSimple = false,
}: ThinkingBubbleProps) {
const statusLabel = isCompacting ? 'Compacting context...' : 'Thinking…'
// Fallback activity from heartbeat — shows last known agent activity
// when no tool calls are in flight (e.g. during pure reasoning)
const heartbeatActivity = useChatStore((s) => s.heartbeatActivity)
// Elapsed time counter — resets when the status label changes (new tool)
// Build a meaningful status label from live activity
const activeToolNames = activeToolCalls
.filter((tc) => tc.phase !== 'done' && tc.phase !== 'complete' && tc.phase !== 'completed')
.map((tc) => tc.name.replace(/_/g, ' '))
const liveToolNames = liveToolActivity.map((a) => a.name.replace(/_/g, ' '))
const uniqueNames = [...new Set([...activeToolNames, ...liveToolNames])]
const activityLabel =
uniqueNames.length > 0
? `Using: ${uniqueNames.slice(0, 3).join(', ')}${uniqueNames.length > 3 ? ` +${uniqueNames.length - 3} more` : ''}`
: null
const statusLabel = isCompacting
? 'Compacting context...'
: forceSimple
? 'Thinking…'
: activityLabel || heartbeatActivity || 'Thinking…'
// Elapsed time counter — counts from bubble mount, not from last label change
const [elapsed, setElapsed] = useState(0)
useEffect(() => {
setElapsed(0)
const interval = window.setInterval(() => setElapsed((s) => s + 1), 1000)
return () => window.clearInterval(interval)
}, [statusLabel])
}, [])
const elapsedLabel =
elapsed >= 60
@@ -351,6 +373,33 @@ function ThinkingBubble({
)
}
/** Minimal status line shown after 10s of thinking when no tool calls
* are in flight yet. Shows heartbeat status + elapsed time. */
function StatusLine() {
const heartbeatActivity = useChatStore((s) => s.heartbeatActivity)
const [elapsed, setElapsed] = useState(0)
useEffect(() => {
const interval = window.setInterval(() => setElapsed((s) => s + 1), 1000)
return () => window.clearInterval(interval)
}, [])
const elapsedLabel =
elapsed >= 60
? `${Math.floor(elapsed / 60)}m ${elapsed % 60}s`
: `${elapsed}s`
return (
<div className="flex items-center gap-2 text-[11px] text-primary-400 dark:text-primary-500 py-0.5">
<span className="inline-block size-1.5 rounded-full bg-amber-400 animate-pulse" />
<span className="opacity-80">
{heartbeatActivity || 'Working…'}
</span>
<span aria-hidden="true" className="opacity-40">·</span>
<span className="tabular-nums opacity-50 font-mono">{elapsedLabel}</span>
</div>
)
}
const VIRTUAL_ROW_HEIGHT = 136
const VIRTUAL_OVERSCAN = 8
const NEAR_BOTTOM_THRESHOLD = 200
@@ -606,6 +655,13 @@ function ChatMessageListComponent({
const [unreadCount, setUnreadCount] = useState(0)
const [expandAllToolSections, setExpandAllToolSections] = useState(false)
// Activity feed delay: only show tool activity after 10s of thinking.
// For the first 10s, the ThinkingBubble stays simple ("Thinking…").
const THINKING_ACTIVITY_DELAY_S = 10
const [thinkingElapsed, setThinkingElapsed] = useState(0)
const thinkingStartRef = useRef<number>(0)
const thinkingTimerRef = useRef<ReturnType<typeof setInterval> | null>(null)
// Bug 2 fix: grace period — keep thinking indicator alive briefly after
// waitingForResponse clears so the response message has time to render.
const [thinkingGrace, setThinkingGrace] = useState(false)
@@ -1107,6 +1163,52 @@ function ChatMessageListComponent({
researchCard && researchCard.steps.length > 0,
)
// Compute visibility of the entire bottom thinking area — the same gate
// used for rendering (lines below). Start / stop the elapsed timer here.
const thinkingAreaVisible =
showTypingIndicator ||
showResearchCard ||
isCompacting ||
liveToolActivity.length > 0 ||
(isStreaming && !streamingText) ||
(isStreaming && activeToolCalls.length > 0)
// Track how long the thinking area has been visible to gate the delayed
// activity feed (10s threshold).
useEffect(() => {
if (thinkingAreaVisible) {
if (thinkingStartRef.current === 0) {
thinkingStartRef.current = Date.now()
setThinkingElapsed(0)
}
if (!thinkingTimerRef.current) {
thinkingTimerRef.current = setInterval(() => {
setThinkingElapsed(
Math.floor((Date.now() - thinkingStartRef.current) / 1000),
)
}, 250)
}
} else {
if (thinkingTimerRef.current) {
clearInterval(thinkingTimerRef.current)
thinkingTimerRef.current = null
}
thinkingStartRef.current = 0
setThinkingElapsed(0)
}
return () => {
if (thinkingTimerRef.current) {
clearInterval(thinkingTimerRef.current)
thinkingTimerRef.current = null
}
}
}, [thinkingAreaVisible])
const showActivityFeed =
thinkingElapsed >= THINKING_ACTIVITY_DELAY_S ||
activeToolCalls.length > 0 ||
liveToolActivity.length > 0
const shouldBottomPin =
visibleEntries.length > 0 ||
showToolOnlyNotice ||
@@ -1146,11 +1248,11 @@ function ChatMessageListComponent({
args: tcAny.args,
preview:
typeof tcAny.preview === 'string'
? (tcAny.preview as string)
? (tcAny.preview)
: undefined,
result:
typeof tcAny.result === 'string'
? (tcAny.result as string)
? (tcAny.result)
: undefined,
}
})
@@ -1823,12 +1925,12 @@ function ChatMessageListComponent({
liveToolActivity={liveToolActivity}
researchCard={researchCard}
isCompacting={isCompacting}
forceSimple={!showActivityFeed}
/>
{/* Branch from the thinking bubble into a single compact
TUI-style tool activity card. Use normalized streaming calls
so the card appears for both structured tool events and the
lighter live activity feed. */}
{normalizedStreamingToolCalls.length > 0 ? (
{/* After 10s of thinking, show activity feed. With tool calls:
compact CLI-style TuiActivityCard (last 3). Without tool calls:
a minimal status line showing elapsed time and heartbeat. */}
{showActivityFeed ? (
<div className="flex max-w-[var(--chat-content-max-width)]">
<div
className="ml-[14px] mr-2 w-px shrink-0"
@@ -1839,8 +1941,9 @@ function ChatMessageListComponent({
aria-hidden
/>
<div className="min-w-0 flex-1 pt-1">
{normalizedStreamingToolCalls.length > 0 ? (
<TuiActivityCard
toolSections={normalizedStreamingToolCalls.map((tc) => {
toolSections={normalizedStreamingToolCalls.slice(-3).map((tc) => {
const phase = tc.phase
const state =
phase === 'error'
@@ -1884,6 +1987,9 @@ function ChatMessageListComponent({
: null
}}
/>
) : (
<StatusLine />
)}
</div>
</div>
) : null}
@@ -1955,11 +2061,24 @@ function getStableMessageId(message: ChatMessage, index: number): string {
}
const timestamp = getRawMessageTimestamp(message)
const text = textFromMessage(message)
// Content-based fingerprint: hash of text content + timestamp.
// This survives reordering because it doesn't depend on array position.
const fingerprint = djb2(text.slice(0, 120))
if (timestamp) {
return `${message.role ?? 'assistant'}-${timestamp}-${index}`
return `${message.role ?? 'assistant'}-${timestamp}-${fingerprint}`
}
return `${message.role ?? 'assistant'}-${index}`
return `${message.role ?? 'assistant'}-${fingerprint}-${index}`
}
/** djb2 string hash — fast, decent distribution, no deps */
function djb2(str: string): string {
let hash = 5381
for (let i = 0; i < str.length; i++) {
hash = ((hash << 5) + hash + str.charCodeAt(i)) | 0
}
return (hash >>> 0).toString(36)
}
function getRawMessageTimestamp(message: ChatMessage): number | null {

View File

@@ -7,6 +7,11 @@ import {
textFromMessage,
} from '../utils'
import { MessageActionsBar } from './message-actions-bar'
import {
buildHermesActivitySummary,
shouldAutoExpandHermesActivityCard,
} from './streaming-activity-ui'
import { TuiActivityCard } from './tui-activity-card'
import type { ChatAttachment, ChatMessage, ToolCallContent } from '../types'
import type { ToolPart } from '@/components/prompt-kit/tool'
import { AssistantAvatar, UserAvatar } from '@/components/avatars'
@@ -31,11 +36,6 @@ import {
useChatSettingsStore,
} from '@/hooks/use-chat-settings'
import { cn } from '@/lib/utils'
import {
buildHermesActivitySummary,
shouldAutoExpandHermesActivityCard,
} from './streaming-activity-ui'
import { TuiActivityCard } from './tui-activity-card'
const WORDS_PER_TICK = 4
const TICK_INTERVAL_MS = 50
@@ -2503,13 +2503,16 @@ function MessageItemComponent({
{/* Grouped tool card above the assistant bubble. Only show once there
is real assistant text in the bubble. While streaming with no text,
the legacy ThinkingBubble in chat-message-list owns the visual and
renders its own branched TuiActivityCard so we don't double up. */}
renders its own branched TuiActivityCard so we don't double up.
When done streaming, show a compact tool-count chip instead of
the full expandable card. */}
{!isUser &&
finalToolSections.length > 0 &&
(hasText || !effectiveIsStreaming) ? (
<div className="w-full max-w-[var(--chat-content-max-width)] flex">
<div className="w-6 shrink-0" aria-hidden />
<div className="min-w-0 flex-1">
{effectiveIsStreaming ? (
<TuiActivityCard
toolSections={finalToolSections}
thinking={null}
@@ -2518,6 +2521,11 @@ function MessageItemComponent({
formatLabel={formatToolDisplayLabel}
formatArg={keyArgLabel}
/>
) : (
<span className="inline-block text-[11px] text-primary-400 dark:text-primary-500 py-0.5 opacity-60">
{finalToolSections.length} tool{finalToolSections.length !== 1 ? 's' : ''} used
</span>
)}
</div>
</div>
) : null}

View File

@@ -0,0 +1,88 @@
import { appendHistoryMessage, chatQueryKeys } from '../chat-queries'
import { textFromMessage } from '../utils'
import type { QueryClient } from '@tanstack/react-query'
import type { ChatMessage } from '../types'
function normalize(value: unknown): string {
return typeof value === 'string' ? value.trim() : ''
}
/**
* Snapshot optimistic user messages from the history cache before a refetch,
* then re-inject them after the refetch completes.
*
* The refetch replaces the query cache with server data which won't include
* the optimistic message yet — without re-injection the user's message
* disappears until the server echoes it.
*
* Matches messages that are:
* - Still optimistic (__optimisticId starts with "opt-")
* - In sending/queued state
* - Already confirmed by SSE (status "sent") but have no server id yet
* (only clientId) — these can still be lost during refetch.
*
* After refetch, the returned closure checks if the server already echoed
* the user message (by clientId or text match) and skips re-injection to
* avoid duplicates.
*
* Usage:
* const reInject = snapshotOptimisticUserMessages(queryClient, friendlyId, sessionKey)
* await queryClient.invalidateQueries(...)
* reInject()
*/
export function snapshotOptimisticUserMessages(
queryClient: QueryClient,
friendlyId: string,
sessionKey: string,
): () => void {
const key = chatQueryKeys.history(friendlyId, sessionKey)
const prevData = queryClient.getQueryData<Record<string, unknown>>(key)
const pending = ((prevData?.messages as Array<unknown> | undefined) ?? []).filter(
(msg: unknown) => {
const raw = msg as Record<string, unknown>
if (raw.role !== 'user') return false
if (String(raw.__optimisticId ?? '').startsWith('opt-')) return true
if (String(raw.status) === 'sending' || String(raw.status) === 'queued') return true
if (String(raw.status) === 'sent') {
// Re-inject only if the message has a clientId (local) but no server id
const hasClientId = normalize(raw.clientId).length > 0 || normalize(raw.client_id).length > 0
const hasServerId = normalize(raw.id).length > 0 || normalize(raw.messageId).length > 0
return hasClientId && !hasServerId
}
return false
},
) as unknown as Array<ChatMessage>
return () => {
const currentData = queryClient.getQueryData<Record<string, unknown>>(key)
const currentMessages = (currentData?.messages as Array<unknown> | undefined) ?? []
for (const msg of pending) {
const raw = msg as unknown as Record<string, unknown>
const msgClientId = normalize(raw.clientId) || normalize(raw.client_id)
const msgText = textFromMessage(msg)
const alreadyPresent = currentMessages.some((m: unknown) => {
const mRaw = m as Record<string, unknown>
if (mRaw.role !== 'user') return false
if (msgClientId) {
const mClientId = normalize(mRaw.clientId) || normalize(mRaw.client_id)
if (mClientId && mClientId === msgClientId) return true
}
if (msgText.length > 0) {
const mText = textFromMessage(m as ChatMessage)
if (mText === msgText) {
const msgTs = (raw.timestamp as number) || 0
const mTs = (mRaw.timestamp as number) || 0
if (msgTs && mTs && Math.abs(msgTs - mTs) < 10_000) return true
}
}
return false
})
if (!alreadyPresent) {
appendHistoryMessage(queryClient, friendlyId, sessionKey, msg)
}
}
}
}

View File

@@ -22,9 +22,14 @@ type ActiveRunResponse = {
const ACTIVE_STATUSES: ReadonlySet<string> = new Set([
'accepted',
'active',
'handoff',
// NOTE: 'handoff' is deliberately excluded. A handoff run means the
// SSE client disconnected — the browser has no active stream. Keeping
// the waiting state alive for handoff runs causes ghost "Thinking"
// indicators on session reopen for runs that completed hours ago.
])
const ACTIVE_RUN_CHECK_TIMEOUT_MS = 2000
/**
* On mount, checks whether the server has an active run for this session.
* If so, marks the session as waiting in the persistent Zustand store.
@@ -33,6 +38,10 @@ const ACTIVE_STATUSES: ReadonlySet<string> = new Set([
* This closes the gap where a user navigates away during streaming,
* the component unmounts (losing local state), and on remount the UI
* doesn't know a run was in progress.
*
* A timeout (ACTIVE_RUN_CHECK_TIMEOUT_MS) ensures the check never blocks
* the UI indefinitely — if the API is slow or unreachable, we assume the
* run is dead and clear stale waiting state.
*/
export function useActiveRunCheck({
sessionKey,
@@ -55,6 +64,25 @@ export function useActiveRunCheck({
hasCheckedRef.current = true
const controller = new AbortController()
let settled = false
const settle = () => {
if (settled) return
settled = true
onCompleteRef.current?.()
}
// Timeout: if the API check doesn't complete in time, assume the run is dead
const timeoutId = window.setTimeout(() => {
if (settled) return
settle()
try { controller.abort() } catch { /* ignore */ }
// Clear stale waiting state — the run is almost certainly dead
const store = useChatStore.getState()
if (store.isSessionWaiting(sessionKeyRef.current)) {
store.clearSessionWaiting(sessionKeyRef.current)
}
}, ACTIVE_RUN_CHECK_TIMEOUT_MS)
async function check() {
try {
@@ -62,10 +90,10 @@ export function useActiveRunCheck({
`/api/sessions/${encodeURIComponent(sessionKey)}/active-run`,
{ signal: controller.signal },
)
if (!response.ok) return
if (!response.ok) return finishCheck()
const data = (await response.json()) as ActiveRunResponse
if (!data.ok) return
if (!data.ok) return finishCheck()
const store = useChatStore.getState()
if (data.run && ACTIVE_STATUSES.has(data.run.status)) {
@@ -75,15 +103,21 @@ export function useActiveRunCheck({
store.clearSessionWaiting(sessionKey)
}
} catch {
// Network error or abort — ignore
// Network error or abort — ignore, already handled by timeout
} finally {
onCompleteRef.current?.()
finishCheck()
}
}
function finishCheck() {
window.clearTimeout(timeoutId)
settle()
}
void check()
return () => {
window.clearTimeout(timeoutId)
controller.abort()
}
}, [sessionKey, enabled])

View File

@@ -702,11 +702,19 @@ function mergeOptimisticHistoryMessages(
}
// Preserve unconfirmed optimistic messages regardless of age.
// Also preserve confirmed-sent messages that have a clientId but no
// server id yet — they were acknowledged by SSE (onStarted) but
// haven't been echoed by the server. Periodic refetches will drop
// them otherwise (the "user message disappears" bug).
const isSending =
optimisticMessage.status === 'sending' ||
Boolean(optimisticMessage.__optimisticId)
const isSentButUnechoed =
optimisticMessage.status === 'sent' &&
Boolean(getMessageClientId(optimisticMessage)) &&
!optimisticMessage.id
if (isSending) {
if (isSending || isSentButUnechoed) {
merged.push(optimisticMessage)
}
}

View File

@@ -5,6 +5,7 @@ import { useChatStore } from '../../../stores/chat-store'
import { appendHistoryMessage, chatQueryKeys } from '../chat-queries'
import { toast } from '../../../components/ui/toast'
import { textFromMessage } from '../utils'
import { snapshotOptimisticUserMessages } from './optimistic-message-reinject'
import type { ChatMessage } from '../types'
import type { StreamingState } from '../../../stores/chat-store'
@@ -324,6 +325,14 @@ export function useRealtimeChatHistory({
const prevCount =
(prevData?.messages as Array<unknown> | undefined)?.length ?? 0
// Snapshot optimistic user messages before refetch so they
// survive the cache replacement. Re-injected after refetch.
const reInjectOptimistic = snapshotOptimisticUserMessages(
queryClient,
effectiveFriendlyId,
effectiveSessionKey,
)
// Issue #441 fix: Directly merge realtime buffer into history cache
// INSTEAD of invalidateQueries. The old approach caused a race:
// invalidateQueries → refetch (async) → merge runs with stale data
@@ -418,6 +427,8 @@ export function useRealtimeChatHistory({
)
}
}
// Re-inject optimistic user messages that the server hasn't echoed yet
reInjectOptimistic()
})
// Check for compaction — significant message count drop

View File

@@ -241,6 +241,7 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
error: message,
}))
onError?.(message)
useChatStore.getState().setHeartbeatActivity(null)
},
[
clearHandoffTimer,
@@ -429,6 +430,7 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
}
onComplete?.(message)
useChatStore.getState().setHeartbeatActivity(null)
},
[clearHandoffTimer, onComplete, stopFrame, unregisterSendStreamRun],
)
@@ -444,7 +446,7 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
typeof window !== 'undefined' &&
window.localStorage?.getItem('hermes:debug:sse') === '1'
) {
// eslint-disable-next-line no-console
console.log(
'[hermes-sse]',
event,
@@ -754,6 +756,8 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
}
case 'heartbeat': {
markActivity()
const activity = (payload as { activity?: string | null }).activity ?? null
useChatStore.getState().setHeartbeatActivity(activity)
break
}
case 'close': {
@@ -851,6 +855,7 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
streamingText: '',
error: null,
})
useChatStore.getState().setHeartbeatActivity(null)
try {
const response = await fetch('/api/send-stream', {

View File

@@ -140,6 +140,11 @@ type ChatState = {
clearSessionWaiting: (sessionKey: string) => void
/** Check if a session is waiting for a response */
isSessionWaiting: (sessionKey: string) => boolean
/** Last activity description forwarded via heartbeat — used by ThinkingBubble
* to show meaningful progress during long reasoning stretches */
heartbeatActivity: string | null
setHeartbeatActivity: (activity: string | null) => void
}
const createEmptyStreamingState = (): StreamingState => ({
@@ -641,6 +646,7 @@ export const useChatStore = create<ChatState>((set, get) => ({
sendStreamRunIds: new Set(),
waitingSessionKeys: _restoredWaiting.keys,
waitingSessionMeta: _restoredWaiting.meta,
heartbeatActivity: null,
setConnectionState: (connectionState, error) => {
set({ connectionState, lastError: error ?? null })
@@ -687,6 +693,10 @@ export const useChatStore = create<ChatState>((set, get) => ({
return get().waitingSessionKeys.has(sessionKey)
},
setHeartbeatActivity: (activity) => {
set({ heartbeatActivity: activity })
},
processEvent: (event) => {
const state = get()
const sessionKey = event.sessionKey
@@ -893,6 +903,31 @@ export const useChatStore = create<ChatState>((set, get) => ({
}
if (duplicateIndex === -1) {
// Multiple message.started events from the agent create distinct
// realtime entries with empty content. Replace the previous empty
// assistant message instead of appending — prevents "3 individual
// messages then one final" bug where each tool phase looks like a
// separate assistant bubble.
if (
incomingMessage.role === 'assistant' &&
newPlainText.length === 0 &&
sessionMessages.length > 0
) {
const prevEmptyIdx = sessionMessages.findLastIndex(
(m) =>
m.role === 'assistant' &&
extractMessageText(m).length === 0,
)
if (prevEmptyIdx >= 0) {
sessionMessages[prevEmptyIdx] = incomingMessage
messages.set(
sessionKey,
sortMessagesChronologically(sessionMessages),
)
set({ realtimeMessages: messages, lastEventAt: now })
break
}
}
sessionMessages.push(incomingMessage)
messages.set(sessionKey, sortMessagesChronologically(sessionMessages))
set({ realtimeMessages: messages, lastEventAt: now })
@@ -1209,6 +1244,13 @@ export const useChatStore = create<ChatState>((set, get) => ({
if (histMsg.role === rtMsg.role && rtText) {
const histText = extractMessageText(histMsg)
if (histText === rtText) return true
// Streaming realtime text is a prefix of the final server text.
// Match either direction to prevent duplicates when the server
// returns the complete message after the realtime buffer had a
// partial version.
if (rtText.length > 0 && histText.length > 0) {
if (histText.startsWith(rtText) || rtText.startsWith(histText)) return true
}
}
const histRaw = histMsg as Record<string, unknown>

View File

@@ -88,6 +88,15 @@ async function isClaudeAgentHealthy(port = 8642): Promise<boolean> {
const config = defineConfig(({ mode, command }) => {
const env = loadEnv(mode, process.cwd(), '')
// Bridge loadEnv into process.env for server-side SSR runtime code that
// reads env vars directly from process.env (e.g. getBearerToken() in
// openai-compat-api.ts reads process.env.HERMES_API_TOKEN). Without this,
// Vite's loadEnv only populates the local `env` object — not process.env.
for (const key of Object.keys(env)) {
if (!(key in process.env)) {
process.env[key] = env[key]
}
}
const claudeApiUrl = env.CLAUDE_API_URL?.trim() || 'http://127.0.0.1:8642'
// /api/connection-status is handled by the real route file at
// src/routes/api/connection-status.ts; the dev server no longer