PR #543: chat UIX/UX fixes — thinking indicators, message dedup, streaming stability (JohnGuidry)

Addresses #572 (double chat responses) + #561 (stuck Thinking indicator).
Adds optimistic-message-reinject hook, vite loadEnv→process.env bridge for SSR
bearer token, dedup + streaming stability. eslint --fix on touched files
(net lint errors 1700→1588). Build GREEN, test 33 fail/694 pass (zero regressions).
This commit is contained in:
Aurora
2026-06-05 06:01:21 -04:00
parent ef2e4ba02b
commit 5271ca9ad3
14 changed files with 704 additions and 219 deletions

1
.gitignore vendored
View File

@@ -16,6 +16,7 @@ build
.vinxi .vinxi
.nitro .nitro
.tanstack .tanstack
.vite
# Environment variables # Environment variables
.env .env

View File

@@ -1,15 +1,10 @@
import { createFileRoute } from '@tanstack/react-router' import { createFileRoute } from '@tanstack/react-router'
import { buildResolvedSessionHeaders } from '../../lib/send-stream-session-headers' import { buildResolvedSessionHeaders } from '../../lib/send-stream-session-headers'
import { buildWorkspaceScopedTextMessage } from '../../lib/workspace-message-scope' import { buildWorkspaceScopedTextMessage } from '../../lib/workspace-message-scope'
import {
collectSyntheticLiveToolEvents,
createSyntheticLiveToolTracker,
} from './-send-stream-live-tools'
import { resolveSessionKey } from '../../server/session-utils' import { resolveSessionKey } from '../../server/session-utils'
import { isAuthenticated } from '../../server/auth-middleware' import { isAuthenticated } from '../../server/auth-middleware'
import { requireJsonContentType } from '../../server/rate-limit' import { requireJsonContentType } from '../../server/rate-limit'
import { publishChatEvent } from '../../server/chat-event-bus' import { publishChatEvent } from '../../server/chat-event-bus'
import { loadWorkspaceCatalog } from './workspace'
import { import {
registerActiveSendRun, registerActiveSendRun,
unregisterActiveSendRun, unregisterActiveSendRun,
@@ -22,8 +17,8 @@ import {
upsertRunToolCall, upsertRunToolCall,
} from '../../server/run-store' } from '../../server/run-store'
import { getChatMode } from '../../server/gateway-capabilities' import { getChatMode } from '../../server/gateway-capabilities'
import { ensureLocalSession, appendLocalMessage, getLocalMessages, touchLocalSession } from '../../server/local-session-store' import { appendLocalMessage, ensureLocalSession, getLocalMessages, touchLocalSession } from '../../server/local-session-store'
import { getLocalProviderDef, getDiscoveredModels } from '../../server/local-provider-discovery' import { getDiscoveredModels, getLocalProviderDef } from '../../server/local-provider-discovery'
import { openaiChat } from '../../server/openai-compat-api' import { openaiChat } from '../../server/openai-compat-api'
import { streamResponses } from '../../server/responses-api' import { streamResponses } from '../../server/responses-api'
import { selectPortableConversationHistory } from '../../server/portable-history' import { selectPortableConversationHistory } from '../../server/portable-history'
@@ -36,6 +31,11 @@ import {
listSessions, listSessions,
streamChat, streamChat,
} from '../../server/claude-api' } from '../../server/claude-api'
import { loadWorkspaceCatalog } from './workspace'
import {
collectSyntheticLiveToolEvents,
createSyntheticLiveToolTracker,
} from './-send-stream-live-tools'
import type {OpenAICompatContentPart, OpenAICompatMessage} from '../../server/openai-compat-api'; import type {OpenAICompatContentPart, OpenAICompatMessage} from '../../server/openai-compat-api';
// Claude agent runs can take 5+ minutes with complex tool chains // Claude agent runs can take 5+ minutes with complex tool chains
const SEND_STREAM_RUN_TIMEOUT_MS = 600_000 const SEND_STREAM_RUN_TIMEOUT_MS = 600_000
@@ -386,10 +386,43 @@ export const Route = createFileRoute('/api/send-stream')({
let streamTimeoutTimer: ReturnType<typeof setTimeout> | null = null let streamTimeoutTimer: ReturnType<typeof setTimeout> | null = null
let heartbeatTimer: ReturnType<typeof setInterval> | null = null let heartbeatTimer: ReturnType<typeof setInterval> | null = null
const abortController = new AbortController() const abortController = new AbortController()
// Close out the SSE stream — stop enqueueing, clear timers, and
// abort the upstream Hermes gateway request so the agent stops
// processing. Does NOT touch run status (persistActiveRun etc.).
// The abort path (request.signal / handleAbort) owns run cleanup.
let closeStream = () => { let closeStream = () => {
if (streamClosed) return
streamClosed = true streamClosed = true
if (heartbeatTimer) {
clearInterval(heartbeatTimer)
heartbeatTimer = null
}
if (unregisterTimer) {
clearTimeout(unregisterTimer)
unregisterTimer = null
}
if (streamTimeoutTimer) {
clearTimeout(streamTimeoutTimer)
streamTimeoutTimer = null
}
abortController.abort()
} }
// When the client hits Stop / navigates away / closes the tab, the
// request.signal fires abort. Stop the upstream agent (closeStream)
// and clean up run tracking so we don't burn API credits on an orphan.
function handleAbort() {
if (activeRunId && !streamClosed) {
persistActiveRun((runSessionKey, activeId) =>
markRunStatus(runSessionKey, activeId, 'handoff'),
)
unregisterActiveSendRun(activeRunId)
activeRunId = null
}
closeStream()
}
request.signal.addEventListener('abort', () => handleAbort(), { once: true })
const persistRunStarted = ( const persistRunStarted = (
runId: string | undefined, runId: string | undefined,
runSessionKey: string, runSessionKey: string,
@@ -419,6 +452,11 @@ export const Route = createFileRoute('/api/send-stream')({
async start(controller) { async start(controller) {
let heartbeatTimer: ReturnType<typeof setInterval> | null = null let heartbeatTimer: ReturnType<typeof setInterval> | null = null
let lastClientEventAt = Date.now() let lastClientEventAt = Date.now()
// Track the last human-readable activity so the heartbeat can
// forward it to the UI. Without this the ThinkingBubble shows a
// static "Thinking…" for minutes when the agent is reasoning
// without tool calls, making it look hung.
let lastActivity: string | null = null
const enqueueRaw = (payload: string) => { const enqueueRaw = (payload: string) => {
if (streamClosed) return if (streamClosed) return
controller.enqueue(encoder.encode(payload)) controller.enqueue(encoder.encode(payload))
@@ -462,10 +500,6 @@ export const Route = createFileRoute('/api/send-stream')({
clearTimeout(streamTimeoutTimer) clearTimeout(streamTimeoutTimer)
streamTimeoutTimer = null streamTimeoutTimer = null
} }
if (heartbeatTimer) {
clearInterval(heartbeatTimer)
heartbeatTimer = null
}
if (activeRunId) { if (activeRunId) {
unregisterActiveSendRun(activeRunId) unregisterActiveSendRun(activeRunId)
activeRunId = null activeRunId = null
@@ -481,9 +515,11 @@ export const Route = createFileRoute('/api/send-stream')({
// Keep the SSE stream alive during long agent processing (tool calls, // Keep the SSE stream alive during long agent processing (tool calls,
// slow LLM responses on large contexts). Without this the client-side // slow LLM responses on large contexts). Without this the client-side
// no-activity timer fires after 2-3 min and aborts the stream. // no-activity timer fires after 2-3 min and aborts the stream.
// Every 10s we also forward the last known activity so the UI can
// show meaningful progress instead of a static "Thinking…".
heartbeatTimer = setInterval(() => { heartbeatTimer = setInterval(() => {
sendEvent('heartbeat', { timestamp: Date.now() }) sendEvent('heartbeat', { timestamp: Date.now(), activity: lastActivity })
}, 30_000) }, 10_000)
try { try {
if (chatMode === 'portable') { if (chatMode === 'portable') {
@@ -514,6 +550,7 @@ export const Route = createFileRoute('/api/send-stream')({
sessionKey: portableSessionKey, sessionKey: portableSessionKey,
friendlyId: portableFriendlyId, friendlyId: portableFriendlyId,
}) })
lastActivity = 'Processing your message...'
try { try {
const userContent = buildMultimodalContent( const userContent = buildMultimodalContent(
@@ -569,7 +606,7 @@ export const Route = createFileRoute('/api/send-stream')({
const useResponsesApi = const useResponsesApi =
process.env.HERMES_USE_RESPONSES === '1' && !localBaseUrl process.env.HERMES_USE_RESPONSES === '1' && !localBaseUrl
if (useResponsesApi) { if (useResponsesApi) {
let thinking = '' const thinking = ''
// Track tool calls by callId so a `tool.completed` // Track tool calls by callId so a `tool.completed`
// followed by `tool.output` can carry the full // followed by `tool.output` can carry the full
// arguments forward without losing them. // arguments forward without losing them.
@@ -615,7 +652,7 @@ export const Route = createFileRoute('/api/send-stream')({
}) })
const argsForCard = const argsForCard =
ev.args && typeof ev.args === 'object' ev.args && typeof ev.args === 'object'
? (ev.args as Record<string, unknown>) ? (ev.args)
: undefined : undefined
persistActiveRun((runSessionKey, activeId) => persistActiveRun((runSessionKey, activeId) =>
upsertRunToolCall(runSessionKey, activeId, { upsertRunToolCall(runSessionKey, activeId, {
@@ -633,6 +670,7 @@ export const Route = createFileRoute('/api/send-stream')({
sessionKey: portableSessionKey, sessionKey: portableSessionKey,
runId, runId,
}) })
lastActivity = `Running: ${ev.name.replace(/_/g, ' ')}`
continue continue
} }
if (ev.kind === 'tool.completed') { if (ev.kind === 'tool.completed') {
@@ -649,7 +687,7 @@ export const Route = createFileRoute('/api/send-stream')({
const state = toolStateByCallId.get(ev.callId) const state = toolStateByCallId.get(ev.callId)
const argsForCard = const argsForCard =
state?.args && typeof state.args === 'object' state?.args && typeof state.args === 'object'
? (state.args as Record<string, unknown>) ? (state.args)
: undefined : undefined
const name = state?.name || 'tool' const name = state?.name || 'tool'
persistActiveRun((runSessionKey, activeId) => persistActiveRun((runSessionKey, activeId) =>
@@ -670,6 +708,7 @@ export const Route = createFileRoute('/api/send-stream')({
sessionKey: portableSessionKey, sessionKey: portableSessionKey,
runId, runId,
}) })
lastActivity = `Completed: ${name.replace(/_/g, ' ')}`
continue continue
} }
if (ev.kind === 'completed') { if (ev.kind === 'completed') {
@@ -1012,6 +1051,7 @@ export const Route = createFileRoute('/api/send-stream')({
sessionKey: sessionKeyFromEvent, sessionKey: sessionKeyFromEvent,
friendlyId: sessionKeyFromEvent, friendlyId: sessionKeyFromEvent,
}) })
lastActivity = 'Processing your message...'
} }
if (event === 'run.started') { if (event === 'run.started') {
@@ -1137,6 +1177,7 @@ export const Route = createFileRoute('/api/send-stream')({
) )
sendEvent('tool', translated) sendEvent('tool', translated)
skipPublish || publishChatEvent('tool', translated) skipPublish || publishChatEvent('tool', translated)
lastActivity = `Running: ${toolName.replace(/_/g, ' ')}`
return return
} }
@@ -1155,6 +1196,7 @@ export const Route = createFileRoute('/api/send-stream')({
} }
sendEvent('thinking', translated) sendEvent('thinking', translated)
skipPublish || publishChatEvent('thinking', translated) skipPublish || publishChatEvent('thinking', translated)
lastActivity = delta.length > 60 ? delta.slice(0, 60) + '...' : delta
return return
} }
const translated = { const translated = {
@@ -1203,6 +1245,7 @@ export const Route = createFileRoute('/api/send-stream')({
) )
sendEvent('tool', translated) sendEvent('tool', translated)
skipPublish || publishChatEvent('tool', translated) skipPublish || publishChatEvent('tool', translated)
lastActivity = `Completed: ${toolName.replace(/_/g, ' ')}`
return return
} }
@@ -1376,10 +1419,10 @@ export const Route = createFileRoute('/api/send-stream')({
) )
const recent = persistedMessages.slice( const recent = persistedMessages.slice(
sliceFrom, sliceFrom,
) as Array<Record<string, unknown>> )
let lastAssistantIndex = -1 let lastAssistantIndex = -1
for (let i = recent.length - 1; i >= 0; i--) { for (let i = recent.length - 1; i >= 0; i--) {
const m = recent[i] as Record<string, unknown> const m = recent[i]
if (m && m.role === 'assistant') { if (m && m.role === 'assistant') {
lastAssistantIndex = i lastAssistantIndex = i
break break
@@ -1388,7 +1431,7 @@ export const Route = createFileRoute('/api/send-stream')({
if (lastAssistantIndex >= 0) { if (lastAssistantIndex >= 0) {
const lastAssistant = recent[ const lastAssistant = recent[
lastAssistantIndex lastAssistantIndex
] as Record<string, unknown> ]
const rawToolCalls = (lastAssistant.tool_calls ?? const rawToolCalls = (lastAssistant.tool_calls ??
(lastAssistant as any).toolCalls) as (lastAssistant as any).toolCalls) as
| Array<Record<string, unknown>> | Array<Record<string, unknown>>
@@ -1478,28 +1521,17 @@ export const Route = createFileRoute('/api/send-stream')({
} }
}, },
cancel() { cancel() {
// Browser navigation/unmount cancels the response reader. That // User clicked Stop, navigated away, or browser closed the tab.
// must not cancel the Hermes run itself: the chat/conductor should // Mark the stream complete, persist the run as 'handoff' so
// keep thinking server-side so the user can return and recover the // session history reflects the interruption, then delegate to
// answer from session history. Mark this client stream closed so we // closeStream() for timer/controller cleanup. Delegate instead
// stop enqueueing SSE chunks, but deliberately leave the upstream // of duplicating cleanup logic to keep the two paths in sync.
// abortController alone. if (activeRunId && !streamClosed) {
streamClosed = true
if (unregisterTimer) {
clearTimeout(unregisterTimer)
unregisterTimer = null
}
if (streamTimeoutTimer) {
clearTimeout(streamTimeoutTimer)
streamTimeoutTimer = null
}
if (activeRunId) {
persistActiveRun((runSessionKey, activeId) => persistActiveRun((runSessionKey, activeId) =>
markRunStatus(runSessionKey, activeId, 'handoff'), markRunStatus(runSessionKey, activeId, 'handoff'),
) )
unregisterActiveSendRun(activeRunId)
activeRunId = null
} }
closeStream()
}, },
}) })

View File

@@ -1,8 +1,5 @@
// Module-level local model override — set by composer when user picks a local model // Module-level local model override — set by composer when user picks a local model
// Avoids prop threading. Reset when switching back to cloud models. // Avoids prop threading. Reset when switching back to cloud models.
export let _localModelOverride = ''
export function setLocalModelOverride(model: string) { _localModelOverride = model }
import { import {
useCallback, useCallback,
useEffect, useEffect,
@@ -21,12 +18,12 @@ import {
textFromMessage, textFromMessage,
} from './utils' } from './utils'
import { import {
advanceStickyStreamingText, advanceStickyStreamingText,
createResponseWaitSnapshot,
createOptimisticMessage, createOptimisticMessage,
createResponseWaitSnapshot,
isTerminalActiveRunStatus, isTerminalActiveRunStatus,
shouldClearWaitingForAssistantMessage, shouldClearWaitingForAssistantMessage
type ResponseWaitSnapshot,
} from './chat-screen-utils' } from './chat-screen-utils'
import { import {
appendHistoryMessage, appendHistoryMessage,
@@ -43,21 +40,20 @@ import { ChatEmptyState } from './components/chat-empty-state'
import { ChatComposer } from './components/chat-composer' import { ChatComposer } from './components/chat-composer'
import { ConnectionStatusMessage } from './components/connection-status-message' import { ConnectionStatusMessage } from './components/connection-status-message'
import { import {
clearPendingSendForSession,
consumePendingSend, consumePendingSend,
hasPendingGeneration, hasPendingGeneration,
hasPendingSend, hasPendingSend,
isRecentSession, isRecentSession,
resetPendingSend, resetPendingSend,
setPendingGeneration, setPendingGeneration,
clearPendingSendForSession,
} from './pending-send' } from './pending-send'
import { useChatMeasurements } from './hooks/use-chat-measurements' import { useChatMeasurements } from './hooks/use-chat-measurements'
import { useChatHistory } from './hooks/use-chat-history' import { useChatHistory } from './hooks/use-chat-history'
import { useRealtimeChatHistory } from './hooks/use-realtime-chat-history' import { useRealtimeChatHistory } from './hooks/use-realtime-chat-history'
import { snapshotOptimisticUserMessages } from './hooks/optimistic-message-reinject'
import { useSmoothStreamingText } from './hooks/use-smooth-streaming-text' import { useSmoothStreamingText } from './hooks/use-smooth-streaming-text'
import { useStreamingMessage } from './hooks/use-streaming-message' import { useStreamingMessage } from './hooks/use-streaming-message'
import { playChatComplete } from '@/lib/sounds'
import { useChatSettingsStore } from '@/hooks/use-chat-settings'
import { useActiveRunCheck } from './hooks/use-active-run-check' import { useActiveRunCheck } from './hooks/use-active-run-check'
import { useChatMobile } from './hooks/use-chat-mobile' import { useChatMobile } from './hooks/use-chat-mobile'
import { useChatSessions } from './hooks/use-chat-sessions' import { useChatSessions } from './hooks/use-chat-sessions'
@@ -70,6 +66,7 @@ import {
CHAT_PENDING_COMMAND_STORAGE_KEY, CHAT_PENDING_COMMAND_STORAGE_KEY,
CHAT_RUN_COMMAND_EVENT, CHAT_RUN_COMMAND_EVENT,
} from './chat-events' } from './chat-events'
import type {ResponseWaitSnapshot} from './chat-screen-utils';
import type { import type {
ChatComposerAttachment, ChatComposerAttachment,
ChatComposerHandle, ChatComposerHandle,
@@ -79,6 +76,9 @@ import type {
import type { ApprovalRequest } from '@/screens/gateway/lib/approvals-store' import type { ApprovalRequest } from '@/screens/gateway/lib/approvals-store'
import type { ChatAttachment, ChatMessage, SessionMeta } from './types' import type { ChatAttachment, ChatMessage, SessionMeta } from './types'
import type { ChatRunCommandDetail } from './chat-events' import type { ChatRunCommandDetail } from './chat-events'
import type {AgentActivity} from '@/stores/chat-activity-store';
import { useChatSettingsStore } from '@/hooks/use-chat-settings'
import { playChatComplete } from '@/lib/sounds'
import { import {
addApproval, addApproval,
loadApprovals, loadApprovals,
@@ -101,12 +101,16 @@ import { MobileSessionsPanel } from '@/components/mobile-sessions-panel'
import { ContextAlertModal } from '@/components/usage-meter/context-alert-modal' import { ContextAlertModal } from '@/components/usage-meter/context-alert-modal'
import { ErrorToastContainer, showErrorToast } from '@/components/error-toast' import { ErrorToastContainer, showErrorToast } from '@/components/error-toast'
// ContextMeter removed — ContextBar (PR #32) replaces it // ContextMeter removed — ContextBar (PR #32) replaces it
import { useChatStore, persistRecoveryMessage } from '@/stores/chat-store' import { persistRecoveryMessage, useChatStore } from '@/stores/chat-store'
import { useSessionModelStore } from '@/stores/session-model-store'
import { useResearchCard } from '@/hooks/use-research-card' import { useResearchCard } from '@/hooks/use-research-card'
// MOBILE_TAB_BAR_OFFSET removed — tab bar always hidden in chat // MOBILE_TAB_BAR_OFFSET removed — tab bar always hidden in chat
import { useTapDebug } from '@/hooks/use-tap-debug' import { useTapDebug } from '@/hooks/use-tap-debug'
import { useChatMode } from '@/hooks/use-chat-mode' import { useChatMode } from '@/hooks/use-chat-mode'
import { useChatActivityStore, type AgentActivity } from '@/stores/chat-activity-store' import { useChatActivityStore } from '@/stores/chat-activity-store'
export let _localModelOverride = ''
export function setLocalModelOverride(model: string) { _localModelOverride = model }
type ChatScreenProps = { type ChatScreenProps = {
activeFriendlyId: string activeFriendlyId: string
@@ -481,45 +485,6 @@ export function ChatScreen({
const portableChatFriendlyId = isPortableMode ? 'main' : activeFriendlyId const portableChatFriendlyId = isPortableMode ? 'main' : activeFriendlyId
// --- Issue #43 fix: lift waitingForResponse into persistent Zustand store --- // --- Issue #43 fix: lift waitingForResponse into persistent Zustand store ---
// The store survives component unmount, so navigating away mid-stream // The store survives component unmount, so navigating away mid-stream
// doesn't lose the "waiting" flag. sessionStorage backup handles reloads.
const storeWaiting = useChatStore((s) => s.waitingSessionKeys)
// resolvedSessionKey isn't available yet (defined below), so we track it via
// a ref that's updated once it resolves. The memo/callback read the ref.
const sessionKeyForWaiting = useRef<string | undefined>(undefined)
const [activeRunCheckDone, setActiveRunCheckDone] = useState(false)
// Track stale-restored sessions that need API verification before showing thinking.
// On page reload, sessionStorage may contain stale "waiting" flags from a
// previous session. We must not show the thinking indicator until the
// active-run API check confirms the run is genuinely active. (Issue #449)
const pendingVerifySessionKeyRef = useRef<string | undefined>(undefined)
const waitingForResponse = useMemo(() => {
const key = sessionKeyForWaiting.current
if (!key) return hasPendingSend() || hasPendingGeneration()
// If we restored waiting state from sessionStorage but haven't verified
// with the API yet, don't show thinking — it might be stale (Issue #449).
if (
storeWaiting.has(key) &&
pendingVerifySessionKeyRef.current === key &&
!activeRunCheckDone
) {
return false
}
return storeWaiting.has(key)
}, [storeWaiting, activeRunCheckDone])
const setWaitingForResponse = useCallback((waiting: boolean) => {
const store = useChatStore.getState()
const key = sessionKeyForWaiting.current
if (!key) return
if (waiting) {
store.setSessionWaiting(key)
} else {
store.clearSessionWaiting(key)
}
}, [])
const [liveToolActivity, setLiveToolActivity] = useState< const [liveToolActivity, setLiveToolActivity] = useState<
Array<{ name: string; timestamp: number }> Array<{ name: string; timestamp: number }>
>([]) >([])
@@ -540,10 +505,18 @@ export function ChatScreen({
if (typeof window === 'undefined') return 'low' if (typeof window === 'undefined') return 'low'
const key = `claude-thinking-${activeFriendlyId || 'new'}` const key = `claude-thinking-${activeFriendlyId || 'new'}`
const stored = window.sessionStorage.getItem(key) const stored = window.sessionStorage.getItem(key)
if (stored === 'off' || stored === 'low' || stored === 'adaptive') if (stored === 'off' || stored === 'low' || stored === 'medium' || stored === 'high' || stored === 'adaptive')
return stored return stored
return 'low' return 'low'
}) })
// Tracks whether the user has explicitly picked a thinking level for this session.
// A missing/absent sessionStorage key means we should fall back to the Hermes config default.
const thinkingInitializedByUserRef = useRef(false)
useEffect(() => {
if (typeof window === 'undefined') return
const key = `claude-thinking-${activeFriendlyId || 'new'}`
thinkingInitializedByUserRef.current = window.sessionStorage.getItem(key) !== null
}, [activeFriendlyId])
const { alertOpen, alertThreshold, alertPercent, dismissAlert } = const { alertOpen, alertThreshold, alertPercent, dismissAlert } =
useContextAlert() useContextAlert()
@@ -611,10 +584,61 @@ export function ChatScreen({
portableMode: isPortableMode, portableMode: isPortableMode,
}) })
// --- Waiting state management (Issue #43 + #449) ---
// resolvedSessionKey is now available (defined above from useChatHistory).
const storeWaiting = useChatStore((s) => s.waitingSessionKeys)
const sessionKeyForWaiting = useRef<string | undefined>(undefined)
const pendingVerifySessionKeyRef = useRef<string | undefined>(undefined)
// Keep the waiting-state ref in sync with the resolved session key // Keep the waiting-state ref in sync with the resolved session key
sessionKeyForWaiting.current = resolvedSessionKey sessionKeyForWaiting.current = resolvedSessionKey
// Detect stale restored waiting state from sessionStorage — we need API // Synchronously detect stale waiting state from sessionStorage.
// This runs during render (not in an effect) so the guard in
// waitingForResponse is active on the very first render, preventing
// a flash of the "Thinking" indicator when reopening an old session.
const needsStaleCheck =
resolvedSessionKey &&
!isNewChat &&
storeWaiting.has(resolvedSessionKey) &&
pendingVerifySessionKeyRef.current !== resolvedSessionKey
if (needsStaleCheck) {
pendingVerifySessionKeyRef.current = resolvedSessionKey
}
// Track whether the active-run API check has completed.
// Initialize to false when we detect stale state (needs verification),
// true otherwise. This prevents showing "Thinking" until the API confirms.
const [activeRunCheckDone, setActiveRunCheckDone] = useState(!needsStaleCheck)
const waitingForResponse = useMemo(() => {
const key = sessionKeyForWaiting.current
if (!key) return hasPendingSend() || hasPendingGeneration()
// If we restored waiting state from sessionStorage but haven't verified
// with the API yet, don't show thinking — it might be stale (Issue #449).
if (
storeWaiting.has(key) &&
pendingVerifySessionKeyRef.current === key &&
!activeRunCheckDone
) {
return false
}
return storeWaiting.has(key)
}, [storeWaiting, activeRunCheckDone])
const setWaitingForResponse = useCallback((waiting: boolean) => {
const store = useChatStore.getState()
const key = sessionKeyForWaiting.current
if (!key) return
if (waiting) {
store.setSessionWaiting(key)
} else {
store.clearSessionWaiting(key)
}
}, [])
// verification before showing thinking (Issue #449). // verification before showing thinking (Issue #449).
useEffect(() => { useEffect(() => {
const currentSessionKey = resolvedSessionKey const currentSessionKey = resolvedSessionKey
@@ -868,13 +892,12 @@ export function ChatScreen({
const streamStart = useCallback(() => { const streamStart = useCallback(() => {
if (!activeFriendlyId || isNewChat) return if (!activeFriendlyId || isNewChat) return
// Bug #3 fix: no more 350ms polling loop — SSE handles realtime updates. // No aggressive delayed refetch here — it wipes optimistic user messages
// Single delayed fetch as fallback to catch the initial response. // from the cache before the server has echoed them, causing the user's
if (streamTimer.current) window.clearTimeout(streamTimer.current) // message to disappear until the agent completes. The existing failsafes
streamTimer.current = window.setTimeout(() => { // (5s + 10s timeouts at lines below, active-run polling) handle the case
if (activeRealtimeStreamingRef.current) return // where SSE misses the done event.
refreshHistoryRef.current() void activeFriendlyId // keep dep for eslint
}, 2000)
}, [activeFriendlyId, isNewChat]) }, [activeFriendlyId, isNewChat])
refreshHistoryRef.current = function refreshHistory() { refreshHistoryRef.current = function refreshHistory() {
@@ -883,37 +906,21 @@ export function ChatScreen({
// Snapshot any unconfirmed optimistic user messages BEFORE refetch. // Snapshot any unconfirmed optimistic user messages BEFORE refetch.
// The refetch replaces the query cache with server data — if the server // The refetch replaces the query cache with server data — if the server
// hasn't processed the user's POST yet, the optimistic message vanishes. // hasn't processed the user's POST yet, the optimistic message vanishes.
const currentMessages = (historyQuery.data as any)?.messages as const historySessionKey = isPortableMode
| Array<ChatMessage> ? 'main'
| undefined : activeSessionKey ||
const pendingOptimistic = (currentMessages ?? []).filter((msg) => { sessionKeyForHistory ||
const raw = msg as Record<string, unknown> resolvedSessionKey ||
return ( 'main'
msg.role === 'user' && const reInjectOptimistic = snapshotOptimisticUserMessages(
(normalizeMessageValue(raw.__optimisticId).startsWith('opt-') || queryClient,
normalizeMessageValue(raw.status) === 'sending') portableChatFriendlyId,
) historySessionKey,
}) )
void historyQuery.refetch().then(() => { void historyQuery.refetch().then(() => {
// Re-inject optimistic messages that weren't in the server response // Re-inject optimistic messages that weren't in the server response
if (pendingOptimistic.length === 0) return reInjectOptimistic()
const historySessionKey = isPortableMode
? 'main'
: activeSessionKey ||
sessionKeyForHistory ||
resolvedSessionKey ||
'main'
if (!portableChatFriendlyId || !historySessionKey) return
for (const optimistic of pendingOptimistic) {
appendHistoryMessage(
queryClient,
portableChatFriendlyId,
historySessionKey,
optimistic,
)
}
}) })
} }
@@ -1018,6 +1025,29 @@ export function ChatScreen({
retry: false, retry: false,
}) })
// Fetch the configured reasoning effort so the Chat Controls default matches
// what Hermes actually uses instead of hardcoding 'low'.
const reasoningEffortQuery = useQuery({
queryKey: ['hermes-config', 'reasoning-effort'],
queryFn: async () => {
try {
const res = await fetch('/api/hermes-config')
if (!res.ok) return 'low'
const data = await res.json() as { config?: Record<string, unknown> }
const agentSection = data?.config?.agent
if (agentSection && typeof agentSection === 'object' && !Array.isArray(agentSection)) {
const effort = (agentSection as Record<string, unknown>).reasoning_effort
if (effort === 'off' || effort === 'low' || effort === 'medium' || effort === 'high') return effort
}
return 'low'
} catch {
return 'low'
}
},
staleTime: 10 * 60 * 1000,
retry: false,
})
const availableModelIds = useMemo(() => { const availableModelIds = useMemo(() => {
const models = modelsQuery.data?.models || [] const models = modelsQuery.data?.models || []
return models.map((m: any) => m.id).filter((id: string) => id) return models.map((m: any) => m.id).filter((id: string) => id)
@@ -1054,6 +1084,16 @@ export function ChatScreen({
} }
}, [currentModel, activeFriendlyId]) }, [currentModel, activeFriendlyId])
// If no per-session thinking level override exists, inherit from Hermes config
useEffect(() => {
if (thinkingInitializedByUserRef.current) return
const configEffort = reasoningEffortQuery.data
if (!configEffort) return
if (configEffort === 'off' || configEffort === 'low' || configEffort === 'medium' || configEffort === 'high') {
setThinkingLevel(configEffort)
}
}, [reasoningEffortQuery.data])
// Persist thinking level changes to sessionStorage // Persist thinking level changes to sessionStorage
const handleThinkingLevelChange = useCallback( const handleThinkingLevelChange = useCallback(
(level: ThinkingLevel) => { (level: ThinkingLevel) => {
@@ -1378,7 +1418,7 @@ export function ChatScreen({
return deduped return deduped
} }
const nextMessages = [...deduped] let nextMessages = [...deduped]
const streamToolCalls = activeToolCalls.map((toolCall) => ({ const streamToolCalls = activeToolCalls.map((toolCall) => ({
...toolCall, ...toolCall,
phase: toolCall.phase, phase: toolCall.phase,
@@ -1394,6 +1434,42 @@ export function ChatScreen({
__streamToolCalls: streamToolCalls, __streamToolCalls: streamToolCalls,
} as ChatMessage } as ChatMessage
// Check if the server has already returned a completed assistant message
// that overlaps with the streaming text. If so, drop the streaming
// placeholder to avoid showing the same response twice.
const streamingText = stableActiveStreamingText.trim()
const hasServerAssistantVersion = nextMessages.some((msg) => {
if (msg.role !== 'assistant') return false
if (msg.__streamingStatus === 'streaming') return false
// Any non-streaming assistant message that appears after the last user
// message is potentially the same response — match by text overlap
if (streamingText.length > 0) {
const msgText = textFromMessage(msg).trim()
if (msgText.length > 0 && (
msgText === streamingText ||
msgText.startsWith(streamingText) ||
streamingText.startsWith(msgText)
)) {
return true
}
}
// Also match by tool calls: if the server message has the same tool
// calls as the streaming placeholder, it's the same response
if (streamToolCalls.length > 0) {
const msgContent = Array.isArray(msg.content) ? msg.content : []
const msgToolCalls = msgContent.filter((p: any) => p.type === 'toolCall')
if (msgToolCalls.length > 0 && msgToolCalls.length === streamToolCalls.length) {
return streamToolCalls.every((stc: any) =>
msgToolCalls.some((mtc: any) => mtc.name === stc.name)
)
}
}
return false
})
if (hasServerAssistantVersion) {
return nextMessages
}
const existingStreamIdx = nextMessages.findIndex( const existingStreamIdx = nextMessages.findIndex(
(message) => message.__streamingStatus === 'streaming', (message) => message.__streamingStatus === 'streaming',
) )
@@ -1403,6 +1479,13 @@ export function ChatScreen({
...nextMessages[existingStreamIdx], ...nextMessages[existingStreamIdx],
...streamingMsg, ...streamingMsg,
} }
// Remove any other streaming messages (e.g. from mergeHistoryMessages
// appending a realtime message after finalDisplayMessages already
// injected a placeholder). Keep only one streaming placeholder.
const keepIdx = existingStreamIdx
nextMessages = nextMessages.filter(
(m, i) => i === keepIdx || m.__streamingStatus !== 'streaming',
)
return nextMessages return nextMessages
} }

View File

@@ -36,8 +36,8 @@ import type {
} from '@/components/slash-command-menu' } from '@/components/slash-command-menu'
import { import {
DEFAULT_SLASH_COMMANDS, DEFAULT_SLASH_COMMANDS,
mergeSlashCommands,
SlashCommandMenu, SlashCommandMenu,
mergeSlashCommands,
} from '@/components/slash-command-menu' } from '@/components/slash-command-menu'
import { import {
PromptInput, PromptInput,
@@ -61,6 +61,7 @@ import {
emitSearchModalEvent, emitSearchModalEvent,
} from '@/hooks/use-search-modal' } from '@/hooks/use-search-modal'
import { setLocalModelOverride } from '@/screens/chat/local-model-override' import { setLocalModelOverride } from '@/screens/chat/local-model-override'
import { formatModelName } from '@/lib/format-model-name'
type ChatComposerAttachment = { type ChatComposerAttachment = {
id: string id: string
@@ -72,7 +73,7 @@ type ChatComposerAttachment = {
kind?: 'image' | 'file' | 'audio' kind?: 'image' | 'file' | 'audio'
} }
type ThinkingLevel = 'off' | 'low' | 'medium' | 'high' type ThinkingLevel = 'off' | 'low' | 'medium' | 'high' | 'adaptive'
type ChatComposerProps = { type ChatComposerProps = {
onSubmit: ( onSubmit: (
@@ -565,6 +566,43 @@ function getResolvedModelKey(model: string, provider?: string): string {
return `${normalizedProvider}/${normalizedModel}` return `${normalizedProvider}/${normalizedModel}`
} }
/**
* Checks whether a model entry matches the current model string.
*
* The current model can arrive in several formats depending on the source:
* - "provider/model-id" (from session-status API, persisted session model)
* - "model-id" (bare ID from config or old data)
*
* The entry always has { id, provider } from the models catalog.
*
* We match if:
* 1. The current model equals the entry ID exactly (bare match), or
* 2. The current model ends with "/<entry.id>" (provider-prefixed match), or
* 3. The resolved key from entry (provider/id) equals the current model.
*/
function isCurrentModel(
currentModel: string,
entryId: string,
entryProvider: string,
): boolean {
const cm = currentModel.trim()
const eid = entryId.trim()
const eprov = entryProvider.trim()
if (!cm || !eid) return false
// Exact match (bare ID)
if (cm === eid) return true
// Current model is "something/<entryId>"
if (cm.endsWith(`/${eid}`)) return true
// Resolved entry key matches current model exactly
const resolved = eprov ? `${eprov}/${eid}` : eid
if (resolved === cm) return true
return false
}
function isCanvasSupported(): boolean { function isCanvasSupported(): boolean {
if (typeof document === 'undefined') return false if (typeof document === 'undefined') return false
try { try {
@@ -1671,7 +1709,7 @@ function ChatComposerComponent({
const promptPlaceholder = isMobileViewport const promptPlaceholder = isMobileViewport
? 'Message...' ? 'Message...'
: 'Ask anything... (↵ to send · ⇧↵ new line · ⌘⇧M switch model)' : 'Ask anything... (↵ to send · ⇧↵ new line · ⌘⇧M switch model)'
const [serverCommands, setServerCommands] = useState<SlashCommandDefinition[]>([]) const [serverCommands, setServerCommands] = useState<Array<SlashCommandDefinition>>([])
useEffect(() => { useEffect(() => {
fetch('/api/commands') fetch('/api/commands')
@@ -2566,9 +2604,11 @@ function ChatComposerComponent({
unpinnedGroups.set(entry.provider, group) unpinnedGroups.set(entry.provider, group)
} }
const renderEntry = (entry: (typeof parsed)[0]) => { const renderEntry = (entry: (typeof parsed)[0]) => {
const isActive = const isActive = isCurrentModel(
entry.id === currentModel || persistedSessionModel || currentModel,
`${defaultProvider}/${entry.id}` === currentModel entry.id,
entry.provider,
)
return ( return (
<div <div
key={entry.id} key={entry.id}
@@ -2757,9 +2797,9 @@ function ChatComposerComponent({
setIsThinkingMenuOpen(false) setIsThinkingMenuOpen(false)
setIsModelMenuOpen(false) setIsModelMenuOpen(false)
}} }}
className="inline-flex h-8 items-center gap-1 rounded-full bg-primary-100/70 px-2 text-xs font-medium text-primary-600 transition-colors hover:bg-primary-200/80 dark:hover:bg-primary-800/60" className="inline-flex h-8 items-center gap-1.5 rounded-full bg-primary-100/70 px-2 text-xs font-medium text-primary-600 transition-colors hover:bg-primary-200/80 dark:hover:bg-primary-800/60"
title="Chat controls" title={`Chat controls · ${modelButtonLabel}`}
aria-label="Chat controls" aria-label={`Chat controls, current model: ${modelButtonLabel}`}
> >
<svg <svg
width="13" width="13"
@@ -2779,6 +2819,7 @@ function ChatComposerComponent({
<circle cx="15" cy="12" r="2" fill="currentColor" stroke="none" /> <circle cx="15" cy="12" r="2" fill="currentColor" stroke="none" />
<circle cx="11" cy="18" r="2" fill="currentColor" stroke="none" /> <circle cx="11" cy="18" r="2" fill="currentColor" stroke="none" />
</svg> </svg>
<span className="max-w-[5rem] truncate sm:max-w-[8rem] md:max-w-[10rem]">{formatModelName(modelButtonLabel)}</span>
<HugeiconsIcon icon={ArrowDown01Icon} size={11} /> <HugeiconsIcon icon={ArrowDown01Icon} size={11} />
</button> </button>
{isControlsMenuOpen ? ( {isControlsMenuOpen ? (
@@ -2946,7 +2987,11 @@ function ChatComposerComponent({
unpinnedGroups.set(entry.provider, group) unpinnedGroups.set(entry.provider, group)
} }
const renderEntry = (entry: (typeof parsed)[0]) => { const renderEntry = (entry: (typeof parsed)[0]) => {
const isActive = entry.id === currentModel || `${defaultProvider}/${entry.id}` === currentModel const isActive = isCurrentModel(
persistedSessionModel || currentModel,
entry.id,
entry.provider,
)
return ( return (
<div key={entry.id} className="group relative flex items-center"> <div key={entry.id} className="group relative flex items-center">
<button <button

View File

@@ -64,7 +64,7 @@ function formatMobileSessionTitle(rawTitle: string): string {
return title return title
} }
type ThinkingLevel = 'off' | 'low' | 'adaptive' type ThinkingLevel = 'off' | 'low' | 'medium' | 'high' | 'adaptive'
type ChatHeaderProps = { type ChatHeaderProps = {
activeTitle: string activeTitle: string

View File

@@ -25,6 +25,7 @@ import { AssistantAvatar } from '@/components/avatars'
import { cn } from '@/lib/utils' import { cn } from '@/lib/utils'
import { hapticTap } from '@/lib/haptics' import { hapticTap } from '@/lib/haptics'
import { CHAT_OPEN_MESSAGE_SEARCH_EVENT } from '@/screens/chat/chat-events' import { CHAT_OPEN_MESSAGE_SEARCH_EVENT } from '@/screens/chat/chat-events'
import { useChatStore } from '@/stores/chat-store'
/** Duration (ms) the thinking indicator stays visible after waitingForResponse /** Duration (ms) the thinking indicator stays visible after waitingForResponse
* clears, giving the first response message time to render before the * clears, giving the first response message time to render before the
@@ -179,28 +180,49 @@ type ThinkingBubbleProps = {
liveToolActivity?: Array<{ name: string; timestamp: number }> liveToolActivity?: Array<{ name: string; timestamp: number }>
researchCard?: UseResearchCardResult researchCard?: UseResearchCardResult
isCompacting?: boolean isCompacting?: boolean
/** When true, always show "Thinking…" regardless of activity. Used for the
* first 10s before the delayed activity feed appears. */
forceSimple?: boolean
} }
/** /**
* Premium shimmer thinking bubble — matches the assistant message position * Shows a thinking indicator with animated dots and a meaningful status
* with three bouncing dots, a gradient shimmer sweep, and a dynamic status
* label that reflects what's actually happening (tool calls, etc.). * label that reflects what's actually happening (tool calls, etc.).
* When forceSimple is true, suppresses all activity labels — just "Thinking…".
*/ */
function ThinkingBubble({ function ThinkingBubble({
activeToolCalls: _activeToolCalls = [], activeToolCalls = [],
liveToolActivity: _liveToolActivity = [], liveToolActivity = [],
researchCard, researchCard,
isCompacting = false, isCompacting = false,
forceSimple = false,
}: ThinkingBubbleProps) { }: ThinkingBubbleProps) {
const statusLabel = isCompacting ? 'Compacting context...' : 'Thinking…' // Fallback activity from heartbeat — shows last known agent activity
// when no tool calls are in flight (e.g. during pure reasoning)
const heartbeatActivity = useChatStore((s) => s.heartbeatActivity)
// Elapsed time counter — resets when the status label changes (new tool) // Build a meaningful status label from live activity
const activeToolNames = activeToolCalls
.filter((tc) => tc.phase !== 'done' && tc.phase !== 'complete' && tc.phase !== 'completed')
.map((tc) => tc.name.replace(/_/g, ' '))
const liveToolNames = liveToolActivity.map((a) => a.name.replace(/_/g, ' '))
const uniqueNames = [...new Set([...activeToolNames, ...liveToolNames])]
const activityLabel =
uniqueNames.length > 0
? `Using: ${uniqueNames.slice(0, 3).join(', ')}${uniqueNames.length > 3 ? ` +${uniqueNames.length - 3} more` : ''}`
: null
const statusLabel = isCompacting
? 'Compacting context...'
: forceSimple
? 'Thinking…'
: activityLabel || heartbeatActivity || 'Thinking…'
// Elapsed time counter — counts from bubble mount, not from last label change
const [elapsed, setElapsed] = useState(0) const [elapsed, setElapsed] = useState(0)
useEffect(() => { useEffect(() => {
setElapsed(0)
const interval = window.setInterval(() => setElapsed((s) => s + 1), 1000) const interval = window.setInterval(() => setElapsed((s) => s + 1), 1000)
return () => window.clearInterval(interval) return () => window.clearInterval(interval)
}, [statusLabel]) }, [])
const elapsedLabel = const elapsedLabel =
elapsed >= 60 elapsed >= 60
@@ -351,6 +373,33 @@ function ThinkingBubble({
) )
} }
/** Minimal status line shown after 10s of thinking when no tool calls
* are in flight yet. Shows heartbeat status + elapsed time. */
function StatusLine() {
const heartbeatActivity = useChatStore((s) => s.heartbeatActivity)
const [elapsed, setElapsed] = useState(0)
useEffect(() => {
const interval = window.setInterval(() => setElapsed((s) => s + 1), 1000)
return () => window.clearInterval(interval)
}, [])
const elapsedLabel =
elapsed >= 60
? `${Math.floor(elapsed / 60)}m ${elapsed % 60}s`
: `${elapsed}s`
return (
<div className="flex items-center gap-2 text-[11px] text-primary-400 dark:text-primary-500 py-0.5">
<span className="inline-block size-1.5 rounded-full bg-amber-400 animate-pulse" />
<span className="opacity-80">
{heartbeatActivity || 'Working…'}
</span>
<span aria-hidden="true" className="opacity-40">·</span>
<span className="tabular-nums opacity-50 font-mono">{elapsedLabel}</span>
</div>
)
}
const VIRTUAL_ROW_HEIGHT = 136 const VIRTUAL_ROW_HEIGHT = 136
const VIRTUAL_OVERSCAN = 8 const VIRTUAL_OVERSCAN = 8
const NEAR_BOTTOM_THRESHOLD = 200 const NEAR_BOTTOM_THRESHOLD = 200
@@ -606,6 +655,13 @@ function ChatMessageListComponent({
const [unreadCount, setUnreadCount] = useState(0) const [unreadCount, setUnreadCount] = useState(0)
const [expandAllToolSections, setExpandAllToolSections] = useState(false) const [expandAllToolSections, setExpandAllToolSections] = useState(false)
// Activity feed delay: only show tool activity after 10s of thinking.
// For the first 10s, the ThinkingBubble stays simple ("Thinking…").
const THINKING_ACTIVITY_DELAY_S = 10
const [thinkingElapsed, setThinkingElapsed] = useState(0)
const thinkingStartRef = useRef<number>(0)
const thinkingTimerRef = useRef<ReturnType<typeof setInterval> | null>(null)
// Bug 2 fix: grace period — keep thinking indicator alive briefly after // Bug 2 fix: grace period — keep thinking indicator alive briefly after
// waitingForResponse clears so the response message has time to render. // waitingForResponse clears so the response message has time to render.
const [thinkingGrace, setThinkingGrace] = useState(false) const [thinkingGrace, setThinkingGrace] = useState(false)
@@ -1107,6 +1163,52 @@ function ChatMessageListComponent({
researchCard && researchCard.steps.length > 0, researchCard && researchCard.steps.length > 0,
) )
// Compute visibility of the entire bottom thinking area — the same gate
// used for rendering (lines below). Start / stop the elapsed timer here.
const thinkingAreaVisible =
showTypingIndicator ||
showResearchCard ||
isCompacting ||
liveToolActivity.length > 0 ||
(isStreaming && !streamingText) ||
(isStreaming && activeToolCalls.length > 0)
// Track how long the thinking area has been visible to gate the delayed
// activity feed (10s threshold).
useEffect(() => {
if (thinkingAreaVisible) {
if (thinkingStartRef.current === 0) {
thinkingStartRef.current = Date.now()
setThinkingElapsed(0)
}
if (!thinkingTimerRef.current) {
thinkingTimerRef.current = setInterval(() => {
setThinkingElapsed(
Math.floor((Date.now() - thinkingStartRef.current) / 1000),
)
}, 250)
}
} else {
if (thinkingTimerRef.current) {
clearInterval(thinkingTimerRef.current)
thinkingTimerRef.current = null
}
thinkingStartRef.current = 0
setThinkingElapsed(0)
}
return () => {
if (thinkingTimerRef.current) {
clearInterval(thinkingTimerRef.current)
thinkingTimerRef.current = null
}
}
}, [thinkingAreaVisible])
const showActivityFeed =
thinkingElapsed >= THINKING_ACTIVITY_DELAY_S ||
activeToolCalls.length > 0 ||
liveToolActivity.length > 0
const shouldBottomPin = const shouldBottomPin =
visibleEntries.length > 0 || visibleEntries.length > 0 ||
showToolOnlyNotice || showToolOnlyNotice ||
@@ -1146,11 +1248,11 @@ function ChatMessageListComponent({
args: tcAny.args, args: tcAny.args,
preview: preview:
typeof tcAny.preview === 'string' typeof tcAny.preview === 'string'
? (tcAny.preview as string) ? (tcAny.preview)
: undefined, : undefined,
result: result:
typeof tcAny.result === 'string' typeof tcAny.result === 'string'
? (tcAny.result as string) ? (tcAny.result)
: undefined, : undefined,
} }
}) })
@@ -1823,12 +1925,12 @@ function ChatMessageListComponent({
liveToolActivity={liveToolActivity} liveToolActivity={liveToolActivity}
researchCard={researchCard} researchCard={researchCard}
isCompacting={isCompacting} isCompacting={isCompacting}
forceSimple={!showActivityFeed}
/> />
{/* Branch from the thinking bubble into a single compact {/* After 10s of thinking, show activity feed. With tool calls:
TUI-style tool activity card. Use normalized streaming calls compact CLI-style TuiActivityCard (last 3). Without tool calls:
so the card appears for both structured tool events and the a minimal status line showing elapsed time and heartbeat. */}
lighter live activity feed. */} {showActivityFeed ? (
{normalizedStreamingToolCalls.length > 0 ? (
<div className="flex max-w-[var(--chat-content-max-width)]"> <div className="flex max-w-[var(--chat-content-max-width)]">
<div <div
className="ml-[14px] mr-2 w-px shrink-0" className="ml-[14px] mr-2 w-px shrink-0"
@@ -1839,51 +1941,55 @@ function ChatMessageListComponent({
aria-hidden aria-hidden
/> />
<div className="min-w-0 flex-1 pt-1"> <div className="min-w-0 flex-1 pt-1">
<TuiActivityCard {normalizedStreamingToolCalls.length > 0 ? (
toolSections={normalizedStreamingToolCalls.map((tc) => { <TuiActivityCard
const phase = tc.phase toolSections={normalizedStreamingToolCalls.slice(-3).map((tc) => {
const state = const phase = tc.phase
phase === 'error' const state =
? ('output-error' as const) phase === 'error'
: phase === 'done' ? ('output-error' as const)
? ('output-available' as const) : phase === 'done'
: phase === 'running' ? ('output-available' as const)
? ('input-streaming' as const) : phase === 'running'
: ('input-available' as const) ? ('input-streaming' as const)
return { : ('input-available' as const)
key: tc.id, return {
type: tc.name, key: tc.id,
input: type: tc.name,
tc.args && input:
typeof tc.args === 'object' && tc.args &&
!Array.isArray(tc.args) typeof tc.args === 'object' &&
? (tc.args as Record<string, unknown>) !Array.isArray(tc.args)
: undefined, ? (tc.args as Record<string, unknown>)
preview: tc.preview, : undefined,
outputText: preview: tc.preview,
state === 'output-available' outputText:
? tc.result || '' state === 'output-available'
: '', ? tc.result || ''
errorText: : '',
state === 'output-error' errorText:
? tc.result || 'Tool failed' state === 'output-error'
: undefined, ? tc.result || 'Tool failed'
state, : undefined,
} state,
})} }
thinking={null} })}
isStreaming={true} thinking={null}
formatLabel={(name) => name.replace(/_/g, ' ')} isStreaming={true}
formatArg={(_name, args) => { formatLabel={(name) => name.replace(/_/g, ' ')}
if (!args) return null formatArg={(_name, args) => {
const first = Object.values(args).find( if (!args) return null
(v) => typeof v === 'string' && v.trim(), const first = Object.values(args).find(
) (v) => typeof v === 'string' && v.trim(),
return typeof first === 'string' )
? first.trim() return typeof first === 'string'
: null ? first.trim()
}} : null
/> }}
/>
) : (
<StatusLine />
)}
</div> </div>
</div> </div>
) : null} ) : null}
@@ -1955,11 +2061,24 @@ function getStableMessageId(message: ChatMessage, index: number): string {
} }
const timestamp = getRawMessageTimestamp(message) const timestamp = getRawMessageTimestamp(message)
const text = textFromMessage(message)
// Content-based fingerprint: hash of text content + timestamp.
// This survives reordering because it doesn't depend on array position.
const fingerprint = djb2(text.slice(0, 120))
if (timestamp) { if (timestamp) {
return `${message.role ?? 'assistant'}-${timestamp}-${index}` return `${message.role ?? 'assistant'}-${timestamp}-${fingerprint}`
} }
return `${message.role ?? 'assistant'}-${index}` return `${message.role ?? 'assistant'}-${fingerprint}-${index}`
}
/** djb2 string hash — fast, decent distribution, no deps */
function djb2(str: string): string {
let hash = 5381
for (let i = 0; i < str.length; i++) {
hash = ((hash << 5) + hash + str.charCodeAt(i)) | 0
}
return (hash >>> 0).toString(36)
} }
function getRawMessageTimestamp(message: ChatMessage): number | null { function getRawMessageTimestamp(message: ChatMessage): number | null {

View File

@@ -7,6 +7,11 @@ import {
textFromMessage, textFromMessage,
} from '../utils' } from '../utils'
import { MessageActionsBar } from './message-actions-bar' import { MessageActionsBar } from './message-actions-bar'
import {
buildHermesActivitySummary,
shouldAutoExpandHermesActivityCard,
} from './streaming-activity-ui'
import { TuiActivityCard } from './tui-activity-card'
import type { ChatAttachment, ChatMessage, ToolCallContent } from '../types' import type { ChatAttachment, ChatMessage, ToolCallContent } from '../types'
import type { ToolPart } from '@/components/prompt-kit/tool' import type { ToolPart } from '@/components/prompt-kit/tool'
import { AssistantAvatar, UserAvatar } from '@/components/avatars' import { AssistantAvatar, UserAvatar } from '@/components/avatars'
@@ -31,11 +36,6 @@ import {
useChatSettingsStore, useChatSettingsStore,
} from '@/hooks/use-chat-settings' } from '@/hooks/use-chat-settings'
import { cn } from '@/lib/utils' import { cn } from '@/lib/utils'
import {
buildHermesActivitySummary,
shouldAutoExpandHermesActivityCard,
} from './streaming-activity-ui'
import { TuiActivityCard } from './tui-activity-card'
const WORDS_PER_TICK = 4 const WORDS_PER_TICK = 4
const TICK_INTERVAL_MS = 50 const TICK_INTERVAL_MS = 50
@@ -2503,21 +2503,29 @@ function MessageItemComponent({
{/* Grouped tool card above the assistant bubble. Only show once there {/* Grouped tool card above the assistant bubble. Only show once there
is real assistant text in the bubble. While streaming with no text, is real assistant text in the bubble. While streaming with no text,
the legacy ThinkingBubble in chat-message-list owns the visual and the legacy ThinkingBubble in chat-message-list owns the visual and
renders its own branched TuiActivityCard so we don't double up. */} renders its own branched TuiActivityCard so we don't double up.
When done streaming, show a compact tool-count chip instead of
the full expandable card. */}
{!isUser && {!isUser &&
finalToolSections.length > 0 && finalToolSections.length > 0 &&
(hasText || !effectiveIsStreaming) ? ( (hasText || !effectiveIsStreaming) ? (
<div className="w-full max-w-[var(--chat-content-max-width)] flex"> <div className="w-full max-w-[var(--chat-content-max-width)] flex">
<div className="w-6 shrink-0" aria-hidden /> <div className="w-6 shrink-0" aria-hidden />
<div className="min-w-0 flex-1"> <div className="min-w-0 flex-1">
<TuiActivityCard {effectiveIsStreaming ? (
toolSections={finalToolSections} <TuiActivityCard
thinking={null} toolSections={finalToolSections}
isStreaming={effectiveIsStreaming} thinking={null}
expandAll={expandAllToolSections} isStreaming={effectiveIsStreaming}
formatLabel={formatToolDisplayLabel} expandAll={expandAllToolSections}
formatArg={keyArgLabel} formatLabel={formatToolDisplayLabel}
/> formatArg={keyArgLabel}
/>
) : (
<span className="inline-block text-[11px] text-primary-400 dark:text-primary-500 py-0.5 opacity-60">
{finalToolSections.length} tool{finalToolSections.length !== 1 ? 's' : ''} used
</span>
)}
</div> </div>
</div> </div>
) : null} ) : null}

View File

@@ -0,0 +1,88 @@
import { appendHistoryMessage, chatQueryKeys } from '../chat-queries'
import { textFromMessage } from '../utils'
import type { QueryClient } from '@tanstack/react-query'
import type { ChatMessage } from '../types'
function normalize(value: unknown): string {
return typeof value === 'string' ? value.trim() : ''
}
/**
* Snapshot optimistic user messages from the history cache before a refetch,
* then re-inject them after the refetch completes.
*
* The refetch replaces the query cache with server data which won't include
* the optimistic message yet — without re-injection the user's message
* disappears until the server echoes it.
*
* Matches messages that are:
* - Still optimistic (__optimisticId starts with "opt-")
* - In sending/queued state
* - Already confirmed by SSE (status "sent") but have no server id yet
* (only clientId) — these can still be lost during refetch.
*
* After refetch, the returned closure checks if the server already echoed
* the user message (by clientId or text match) and skips re-injection to
* avoid duplicates.
*
* Usage:
* const reInject = snapshotOptimisticUserMessages(queryClient, friendlyId, sessionKey)
* await queryClient.invalidateQueries(...)
* reInject()
*/
export function snapshotOptimisticUserMessages(
queryClient: QueryClient,
friendlyId: string,
sessionKey: string,
): () => void {
const key = chatQueryKeys.history(friendlyId, sessionKey)
const prevData = queryClient.getQueryData<Record<string, unknown>>(key)
const pending = ((prevData?.messages as Array<unknown> | undefined) ?? []).filter(
(msg: unknown) => {
const raw = msg as Record<string, unknown>
if (raw.role !== 'user') return false
if (String(raw.__optimisticId ?? '').startsWith('opt-')) return true
if (String(raw.status) === 'sending' || String(raw.status) === 'queued') return true
if (String(raw.status) === 'sent') {
// Re-inject only if the message has a clientId (local) but no server id
const hasClientId = normalize(raw.clientId).length > 0 || normalize(raw.client_id).length > 0
const hasServerId = normalize(raw.id).length > 0 || normalize(raw.messageId).length > 0
return hasClientId && !hasServerId
}
return false
},
) as unknown as Array<ChatMessage>
return () => {
const currentData = queryClient.getQueryData<Record<string, unknown>>(key)
const currentMessages = (currentData?.messages as Array<unknown> | undefined) ?? []
for (const msg of pending) {
const raw = msg as unknown as Record<string, unknown>
const msgClientId = normalize(raw.clientId) || normalize(raw.client_id)
const msgText = textFromMessage(msg)
const alreadyPresent = currentMessages.some((m: unknown) => {
const mRaw = m as Record<string, unknown>
if (mRaw.role !== 'user') return false
if (msgClientId) {
const mClientId = normalize(mRaw.clientId) || normalize(mRaw.client_id)
if (mClientId && mClientId === msgClientId) return true
}
if (msgText.length > 0) {
const mText = textFromMessage(m as ChatMessage)
if (mText === msgText) {
const msgTs = (raw.timestamp as number) || 0
const mTs = (mRaw.timestamp as number) || 0
if (msgTs && mTs && Math.abs(msgTs - mTs) < 10_000) return true
}
}
return false
})
if (!alreadyPresent) {
appendHistoryMessage(queryClient, friendlyId, sessionKey, msg)
}
}
}
}

View File

@@ -22,9 +22,14 @@ type ActiveRunResponse = {
const ACTIVE_STATUSES: ReadonlySet<string> = new Set([ const ACTIVE_STATUSES: ReadonlySet<string> = new Set([
'accepted', 'accepted',
'active', 'active',
'handoff', // NOTE: 'handoff' is deliberately excluded. A handoff run means the
// SSE client disconnected — the browser has no active stream. Keeping
// the waiting state alive for handoff runs causes ghost "Thinking"
// indicators on session reopen for runs that completed hours ago.
]) ])
const ACTIVE_RUN_CHECK_TIMEOUT_MS = 2000
/** /**
* On mount, checks whether the server has an active run for this session. * On mount, checks whether the server has an active run for this session.
* If so, marks the session as waiting in the persistent Zustand store. * If so, marks the session as waiting in the persistent Zustand store.
@@ -33,6 +38,10 @@ const ACTIVE_STATUSES: ReadonlySet<string> = new Set([
* This closes the gap where a user navigates away during streaming, * This closes the gap where a user navigates away during streaming,
* the component unmounts (losing local state), and on remount the UI * the component unmounts (losing local state), and on remount the UI
* doesn't know a run was in progress. * doesn't know a run was in progress.
*
* A timeout (ACTIVE_RUN_CHECK_TIMEOUT_MS) ensures the check never blocks
* the UI indefinitely — if the API is slow or unreachable, we assume the
* run is dead and clear stale waiting state.
*/ */
export function useActiveRunCheck({ export function useActiveRunCheck({
sessionKey, sessionKey,
@@ -55,6 +64,25 @@ export function useActiveRunCheck({
hasCheckedRef.current = true hasCheckedRef.current = true
const controller = new AbortController() const controller = new AbortController()
let settled = false
const settle = () => {
if (settled) return
settled = true
onCompleteRef.current?.()
}
// Timeout: if the API check doesn't complete in time, assume the run is dead
const timeoutId = window.setTimeout(() => {
if (settled) return
settle()
try { controller.abort() } catch { /* ignore */ }
// Clear stale waiting state — the run is almost certainly dead
const store = useChatStore.getState()
if (store.isSessionWaiting(sessionKeyRef.current)) {
store.clearSessionWaiting(sessionKeyRef.current)
}
}, ACTIVE_RUN_CHECK_TIMEOUT_MS)
async function check() { async function check() {
try { try {
@@ -62,10 +90,10 @@ export function useActiveRunCheck({
`/api/sessions/${encodeURIComponent(sessionKey)}/active-run`, `/api/sessions/${encodeURIComponent(sessionKey)}/active-run`,
{ signal: controller.signal }, { signal: controller.signal },
) )
if (!response.ok) return if (!response.ok) return finishCheck()
const data = (await response.json()) as ActiveRunResponse const data = (await response.json()) as ActiveRunResponse
if (!data.ok) return if (!data.ok) return finishCheck()
const store = useChatStore.getState() const store = useChatStore.getState()
if (data.run && ACTIVE_STATUSES.has(data.run.status)) { if (data.run && ACTIVE_STATUSES.has(data.run.status)) {
@@ -75,15 +103,21 @@ export function useActiveRunCheck({
store.clearSessionWaiting(sessionKey) store.clearSessionWaiting(sessionKey)
} }
} catch { } catch {
// Network error or abort — ignore // Network error or abort — ignore, already handled by timeout
} finally { } finally {
onCompleteRef.current?.() finishCheck()
} }
} }
function finishCheck() {
window.clearTimeout(timeoutId)
settle()
}
void check() void check()
return () => { return () => {
window.clearTimeout(timeoutId)
controller.abort() controller.abort()
} }
}, [sessionKey, enabled]) }, [sessionKey, enabled])

View File

@@ -702,11 +702,19 @@ function mergeOptimisticHistoryMessages(
} }
// Preserve unconfirmed optimistic messages regardless of age. // Preserve unconfirmed optimistic messages regardless of age.
// Also preserve confirmed-sent messages that have a clientId but no
// server id yet — they were acknowledged by SSE (onStarted) but
// haven't been echoed by the server. Periodic refetches will drop
// them otherwise (the "user message disappears" bug).
const isSending = const isSending =
optimisticMessage.status === 'sending' || optimisticMessage.status === 'sending' ||
Boolean(optimisticMessage.__optimisticId) Boolean(optimisticMessage.__optimisticId)
const isSentButUnechoed =
optimisticMessage.status === 'sent' &&
Boolean(getMessageClientId(optimisticMessage)) &&
!optimisticMessage.id
if (isSending) { if (isSending || isSentButUnechoed) {
merged.push(optimisticMessage) merged.push(optimisticMessage)
} }
} }

View File

@@ -5,6 +5,7 @@ import { useChatStore } from '../../../stores/chat-store'
import { appendHistoryMessage, chatQueryKeys } from '../chat-queries' import { appendHistoryMessage, chatQueryKeys } from '../chat-queries'
import { toast } from '../../../components/ui/toast' import { toast } from '../../../components/ui/toast'
import { textFromMessage } from '../utils' import { textFromMessage } from '../utils'
import { snapshotOptimisticUserMessages } from './optimistic-message-reinject'
import type { ChatMessage } from '../types' import type { ChatMessage } from '../types'
import type { StreamingState } from '../../../stores/chat-store' import type { StreamingState } from '../../../stores/chat-store'
@@ -324,6 +325,14 @@ export function useRealtimeChatHistory({
const prevCount = const prevCount =
(prevData?.messages as Array<unknown> | undefined)?.length ?? 0 (prevData?.messages as Array<unknown> | undefined)?.length ?? 0
// Snapshot optimistic user messages before refetch so they
// survive the cache replacement. Re-injected after refetch.
const reInjectOptimistic = snapshotOptimisticUserMessages(
queryClient,
effectiveFriendlyId,
effectiveSessionKey,
)
// Issue #441 fix: Directly merge realtime buffer into history cache // Issue #441 fix: Directly merge realtime buffer into history cache
// INSTEAD of invalidateQueries. The old approach caused a race: // INSTEAD of invalidateQueries. The old approach caused a race:
// invalidateQueries → refetch (async) → merge runs with stale data // invalidateQueries → refetch (async) → merge runs with stale data
@@ -418,6 +427,8 @@ export function useRealtimeChatHistory({
) )
} }
} }
// Re-inject optimistic user messages that the server hasn't echoed yet
reInjectOptimistic()
}) })
// Check for compaction — significant message count drop // Check for compaction — significant message count drop

View File

@@ -241,6 +241,7 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
error: message, error: message,
})) }))
onError?.(message) onError?.(message)
useChatStore.getState().setHeartbeatActivity(null)
}, },
[ [
clearHandoffTimer, clearHandoffTimer,
@@ -429,6 +430,7 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
} }
onComplete?.(message) onComplete?.(message)
useChatStore.getState().setHeartbeatActivity(null)
}, },
[clearHandoffTimer, onComplete, stopFrame, unregisterSendStreamRun], [clearHandoffTimer, onComplete, stopFrame, unregisterSendStreamRun],
) )
@@ -444,7 +446,7 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
typeof window !== 'undefined' && typeof window !== 'undefined' &&
window.localStorage?.getItem('hermes:debug:sse') === '1' window.localStorage?.getItem('hermes:debug:sse') === '1'
) { ) {
// eslint-disable-next-line no-console
console.log( console.log(
'[hermes-sse]', '[hermes-sse]',
event, event,
@@ -754,6 +756,8 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
} }
case 'heartbeat': { case 'heartbeat': {
markActivity() markActivity()
const activity = (payload as { activity?: string | null }).activity ?? null
useChatStore.getState().setHeartbeatActivity(activity)
break break
} }
case 'close': { case 'close': {
@@ -851,6 +855,7 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
streamingText: '', streamingText: '',
error: null, error: null,
}) })
useChatStore.getState().setHeartbeatActivity(null)
try { try {
const response = await fetch('/api/send-stream', { const response = await fetch('/api/send-stream', {

View File

@@ -140,6 +140,11 @@ type ChatState = {
clearSessionWaiting: (sessionKey: string) => void clearSessionWaiting: (sessionKey: string) => void
/** Check if a session is waiting for a response */ /** Check if a session is waiting for a response */
isSessionWaiting: (sessionKey: string) => boolean isSessionWaiting: (sessionKey: string) => boolean
/** Last activity description forwarded via heartbeat — used by ThinkingBubble
* to show meaningful progress during long reasoning stretches */
heartbeatActivity: string | null
setHeartbeatActivity: (activity: string | null) => void
} }
const createEmptyStreamingState = (): StreamingState => ({ const createEmptyStreamingState = (): StreamingState => ({
@@ -641,6 +646,7 @@ export const useChatStore = create<ChatState>((set, get) => ({
sendStreamRunIds: new Set(), sendStreamRunIds: new Set(),
waitingSessionKeys: _restoredWaiting.keys, waitingSessionKeys: _restoredWaiting.keys,
waitingSessionMeta: _restoredWaiting.meta, waitingSessionMeta: _restoredWaiting.meta,
heartbeatActivity: null,
setConnectionState: (connectionState, error) => { setConnectionState: (connectionState, error) => {
set({ connectionState, lastError: error ?? null }) set({ connectionState, lastError: error ?? null })
@@ -687,6 +693,10 @@ export const useChatStore = create<ChatState>((set, get) => ({
return get().waitingSessionKeys.has(sessionKey) return get().waitingSessionKeys.has(sessionKey)
}, },
setHeartbeatActivity: (activity) => {
set({ heartbeatActivity: activity })
},
processEvent: (event) => { processEvent: (event) => {
const state = get() const state = get()
const sessionKey = event.sessionKey const sessionKey = event.sessionKey
@@ -893,6 +903,31 @@ export const useChatStore = create<ChatState>((set, get) => ({
} }
if (duplicateIndex === -1) { if (duplicateIndex === -1) {
// Multiple message.started events from the agent create distinct
// realtime entries with empty content. Replace the previous empty
// assistant message instead of appending — prevents "3 individual
// messages then one final" bug where each tool phase looks like a
// separate assistant bubble.
if (
incomingMessage.role === 'assistant' &&
newPlainText.length === 0 &&
sessionMessages.length > 0
) {
const prevEmptyIdx = sessionMessages.findLastIndex(
(m) =>
m.role === 'assistant' &&
extractMessageText(m).length === 0,
)
if (prevEmptyIdx >= 0) {
sessionMessages[prevEmptyIdx] = incomingMessage
messages.set(
sessionKey,
sortMessagesChronologically(sessionMessages),
)
set({ realtimeMessages: messages, lastEventAt: now })
break
}
}
sessionMessages.push(incomingMessage) sessionMessages.push(incomingMessage)
messages.set(sessionKey, sortMessagesChronologically(sessionMessages)) messages.set(sessionKey, sortMessagesChronologically(sessionMessages))
set({ realtimeMessages: messages, lastEventAt: now }) set({ realtimeMessages: messages, lastEventAt: now })
@@ -1209,6 +1244,13 @@ export const useChatStore = create<ChatState>((set, get) => ({
if (histMsg.role === rtMsg.role && rtText) { if (histMsg.role === rtMsg.role && rtText) {
const histText = extractMessageText(histMsg) const histText = extractMessageText(histMsg)
if (histText === rtText) return true if (histText === rtText) return true
// Streaming realtime text is a prefix of the final server text.
// Match either direction to prevent duplicates when the server
// returns the complete message after the realtime buffer had a
// partial version.
if (rtText.length > 0 && histText.length > 0) {
if (histText.startsWith(rtText) || rtText.startsWith(histText)) return true
}
} }
const histRaw = histMsg as Record<string, unknown> const histRaw = histMsg as Record<string, unknown>

View File

@@ -88,6 +88,15 @@ async function isClaudeAgentHealthy(port = 8642): Promise<boolean> {
const config = defineConfig(({ mode, command }) => { const config = defineConfig(({ mode, command }) => {
const env = loadEnv(mode, process.cwd(), '') const env = loadEnv(mode, process.cwd(), '')
// Bridge loadEnv into process.env for server-side SSR runtime code that
// reads env vars directly from process.env (e.g. getBearerToken() in
// openai-compat-api.ts reads process.env.HERMES_API_TOKEN). Without this,
// Vite's loadEnv only populates the local `env` object — not process.env.
for (const key of Object.keys(env)) {
if (!(key in process.env)) {
process.env[key] = env[key]
}
}
const claudeApiUrl = env.CLAUDE_API_URL?.trim() || 'http://127.0.0.1:8642' const claudeApiUrl = env.CLAUDE_API_URL?.trim() || 'http://127.0.0.1:8642'
// /api/connection-status is handled by the real route file at // /api/connection-status is handled by the real route file at
// src/routes/api/connection-status.ts; the dev server no longer // src/routes/api/connection-status.ts; the dev server no longer