PR #543: chat UIX/UX fixes — thinking indicators, message dedup, streaming stability (JohnGuidry)

Addresses #572 (double chat responses) + #561 (stuck Thinking indicator). Adds optimistic-message-reinject hook, vite loadEnv→process.env bridge for SSR bearer token, dedup + streaming stability. eslint --fix on touched files (net lint errors 1700→1588). Build GREEN, test 33 fail/694 pass (zero regressions).
2026-06-05 06:01:21 -04:00
parent ef2e4ba02b
commit 5271ca9ad3
14 changed files with 704 additions and 219 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@ build
 .vinxi
 .nitro
 .tanstack
 .vite
 # Environment variables
 .env
--- a/src/routes/api/send-stream.ts
+++ b/src/routes/api/send-stream.ts
@@ -1,15 +1,10 @@
 import { createFileRoute } from '@tanstack/react-router'
 import { buildResolvedSessionHeaders } from '../../lib/send-stream-session-headers'
 import { buildWorkspaceScopedTextMessage } from '../../lib/workspace-message-scope'
 import {
  collectSyntheticLiveToolEvents,
  createSyntheticLiveToolTracker,
 } from './-send-stream-live-tools'
 import { resolveSessionKey } from '../../server/session-utils'
 import { isAuthenticated } from '../../server/auth-middleware'
 import { requireJsonContentType } from '../../server/rate-limit'
 import { publishChatEvent } from '../../server/chat-event-bus'
 import { loadWorkspaceCatalog } from './workspace'
 import {
  registerActiveSendRun,
  unregisterActiveSendRun,
@@ -22,8 +17,8 @@ import {
  upsertRunToolCall,
 } from '../../server/run-store'
 import { getChatMode } from '../../server/gateway-capabilities'
-import { ensureLocalSession, appendLocalMessage, getLocalMessages, touchLocalSession } from '../../server/local-session-store'
+import { appendLocalMessage, ensureLocalSession, getLocalMessages, touchLocalSession } from '../../server/local-session-store'
-import { getLocalProviderDef, getDiscoveredModels } from '../../server/local-provider-discovery'
+import { getDiscoveredModels, getLocalProviderDef } from '../../server/local-provider-discovery'
 import { openaiChat } from '../../server/openai-compat-api'
 import { streamResponses } from '../../server/responses-api'
 import { selectPortableConversationHistory } from '../../server/portable-history'
@@ -36,6 +31,11 @@ import {
  listSessions,
  streamChat,
 } from '../../server/claude-api'
 import { loadWorkspaceCatalog } from './workspace'
 import {
  collectSyntheticLiveToolEvents,
  createSyntheticLiveToolTracker,
 } from './-send-stream-live-tools'
 import type {OpenAICompatContentPart, OpenAICompatMessage} from '../../server/openai-compat-api';
 // Claude agent runs can take 5+ minutes with complex tool chains
 const SEND_STREAM_RUN_TIMEOUT_MS = 600_000
@@ -386,10 +386,43 @@ export const Route = createFileRoute('/api/send-stream')({
        let streamTimeoutTimer: ReturnType<typeof setTimeout> | null = null
        let heartbeatTimer: ReturnType<typeof setInterval> | null = null
        const abortController = new AbortController()
        // Close out the SSE stream — stop enqueueing, clear timers, and
        // abort the upstream Hermes gateway request so the agent stops
        // processing.  Does NOT touch run status (persistActiveRun etc.).
        // The abort path (request.signal / handleAbort) owns run cleanup.
        let closeStream = () => {
          if (streamClosed) return
          streamClosed = true
          if (heartbeatTimer) {
            clearInterval(heartbeatTimer)
            heartbeatTimer = null
          }
          if (unregisterTimer) {
            clearTimeout(unregisterTimer)
            unregisterTimer = null
          }
          if (streamTimeoutTimer) {
            clearTimeout(streamTimeoutTimer)
            streamTimeoutTimer = null
          }
          abortController.abort()
        }
        // When the client hits Stop / navigates away / closes the tab, the
        // request.signal fires abort.  Stop the upstream agent (closeStream)
        // and clean up run tracking so we don't burn API credits on an orphan.
        function handleAbort() {
          if (activeRunId && !streamClosed) {
            persistActiveRun((runSessionKey, activeId) =>
              markRunStatus(runSessionKey, activeId, 'handoff'),
            )
            unregisterActiveSendRun(activeRunId)
            activeRunId = null
          }
          closeStream()
        }
        request.signal.addEventListener('abort', () => handleAbort(), { once: true })
        const persistRunStarted = (
          runId: string | undefined,
          runSessionKey: string,
@@ -419,6 +452,11 @@ export const Route = createFileRoute('/api/send-stream')({
          async start(controller) {
            let heartbeatTimer: ReturnType<typeof setInterval> | null = null
            let lastClientEventAt = Date.now()
            // Track the last human-readable activity so the heartbeat can
            // forward it to the UI. Without this the ThinkingBubble shows a
            // static "Thinking…" for minutes when the agent is reasoning
            // without tool calls, making it look hung.
            let lastActivity: string | null = null
            const enqueueRaw = (payload: string) => {
              if (streamClosed) return
              controller.enqueue(encoder.encode(payload))
@@ -462,10 +500,6 @@ export const Route = createFileRoute('/api/send-stream')({
                clearTimeout(streamTimeoutTimer)
                streamTimeoutTimer = null
              }
              if (heartbeatTimer) {
                clearInterval(heartbeatTimer)
                heartbeatTimer = null
              }
              if (activeRunId) {
                unregisterActiveSendRun(activeRunId)
                activeRunId = null
@@ -481,9 +515,11 @@ export const Route = createFileRoute('/api/send-stream')({
            // Keep the SSE stream alive during long agent processing (tool calls,
            // slow LLM responses on large contexts). Without this the client-side
            // no-activity timer fires after 2-3 min and aborts the stream.
            // Every 10s we also forward the last known activity so the UI can
            // show meaningful progress instead of a static "Thinking…".
            heartbeatTimer = setInterval(() => {
-              sendEvent('heartbeat', { timestamp: Date.now() })
+              sendEvent('heartbeat', { timestamp: Date.now(), activity: lastActivity })
-            }, 30_000)
+            }, 10_000)
            try {
              if (chatMode === 'portable') {
@@ -514,6 +550,7 @@ export const Route = createFileRoute('/api/send-stream')({
                  sessionKey: portableSessionKey,
                  friendlyId: portableFriendlyId,
                })
                lastActivity = 'Processing your message...'
                try {
                  const userContent = buildMultimodalContent(
@@ -569,7 +606,7 @@ export const Route = createFileRoute('/api/send-stream')({
                  const useResponsesApi =
                    process.env.HERMES_USE_RESPONSES === '1' && !localBaseUrl
                  if (useResponsesApi) {
-                    let thinking = ''
+                    const thinking = ''
                    // Track tool calls by callId so a `tool.completed`
                    // followed by `tool.output` can carry the full
                    // arguments forward without losing them.
@@ -615,7 +652,7 @@ export const Route = createFileRoute('/api/send-stream')({
                          })
                          const argsForCard =
                            ev.args && typeof ev.args === 'object'
-                              ? (ev.args as Record<string, unknown>)
+                              ? (ev.args)
                              : undefined
                          persistActiveRun((runSessionKey, activeId) =>
                            upsertRunToolCall(runSessionKey, activeId, {
@@ -633,6 +670,7 @@ export const Route = createFileRoute('/api/send-stream')({
                            sessionKey: portableSessionKey,
                            runId,
                          })
                          lastActivity = `Running: ${ev.name.replace(/_/g, ' ')}`
                          continue
                        }
                        if (ev.kind === 'tool.completed') {
@@ -649,7 +687,7 @@ export const Route = createFileRoute('/api/send-stream')({
                          const state = toolStateByCallId.get(ev.callId)
                          const argsForCard =
                            state?.args && typeof state.args === 'object'
-                              ? (state.args as Record<string, unknown>)
+                              ? (state.args)
                              : undefined
                          const name = state?.name || 'tool'
                          persistActiveRun((runSessionKey, activeId) =>
@@ -670,6 +708,7 @@ export const Route = createFileRoute('/api/send-stream')({
                            sessionKey: portableSessionKey,
                            runId,
                          })
                          lastActivity = `Completed: ${name.replace(/_/g, ' ')}`
                          continue
                        }
                        if (ev.kind === 'completed') {
@@ -1012,6 +1051,7 @@ export const Route = createFileRoute('/api/send-stream')({
                        sessionKey: sessionKeyFromEvent,
                        friendlyId: sessionKeyFromEvent,
                      })
                      lastActivity = 'Processing your message...'
                    }
                    if (event === 'run.started') {
@@ -1137,6 +1177,7 @@ export const Route = createFileRoute('/api/send-stream')({
                      )
                      sendEvent('tool', translated)
                      skipPublish || publishChatEvent('tool', translated)
                      lastActivity = `Running: ${toolName.replace(/_/g, ' ')}`
                      return
                    }
@@ -1155,6 +1196,7 @@ export const Route = createFileRoute('/api/send-stream')({
                        }
                        sendEvent('thinking', translated)
                        skipPublish || publishChatEvent('thinking', translated)
                        lastActivity = delta.length > 60 ? delta.slice(0, 60) + '...' : delta
                        return
                      }
                      const translated = {
@@ -1203,6 +1245,7 @@ export const Route = createFileRoute('/api/send-stream')({
                      )
                      sendEvent('tool', translated)
                      skipPublish || publishChatEvent('tool', translated)
                      lastActivity = `Completed: ${toolName.replace(/_/g, ' ')}`
                      return
                    }
@@ -1376,10 +1419,10 @@ export const Route = createFileRoute('/api/send-stream')({
                          )
                          const recent = persistedMessages.slice(
                            sliceFrom,
-                          ) as Array<Record<string, unknown>>
+                          )
                          let lastAssistantIndex = -1
                          for (let i = recent.length - 1; i >= 0; i--) {
-                            const m = recent[i] as Record<string, unknown>
+                            const m = recent[i]
                            if (m && m.role === 'assistant') {
                              lastAssistantIndex = i
                              break
@@ -1388,7 +1431,7 @@ export const Route = createFileRoute('/api/send-stream')({
                          if (lastAssistantIndex >= 0) {
                            const lastAssistant = recent[
                              lastAssistantIndex
-                            ] as Record<string, unknown>
+                            ]
                            const rawToolCalls = (lastAssistant.tool_calls ??
                              (lastAssistant as any).toolCalls) as
                              | Array<Record<string, unknown>>
@@ -1478,28 +1521,17 @@ export const Route = createFileRoute('/api/send-stream')({
            }
          },
          cancel() {
-            // Browser navigation/unmount cancels the response reader. That
+            // User clicked Stop, navigated away, or browser closed the tab.
-            // must not cancel the Hermes run itself: the chat/conductor should
+            // Mark the stream complete, persist the run as 'handoff' so
-            // keep thinking server-side so the user can return and recover the
+            // session history reflects the interruption, then delegate to
-            // answer from session history. Mark this client stream closed so we
+            // closeStream() for timer/controller cleanup.  Delegate instead
-            // stop enqueueing SSE chunks, but deliberately leave the upstream
+            // of duplicating cleanup logic to keep the two paths in sync.
-            // abortController alone.
+            if (activeRunId && !streamClosed) {
            streamClosed = true
            if (unregisterTimer) {
              clearTimeout(unregisterTimer)
              unregisterTimer = null
            }
            if (streamTimeoutTimer) {
              clearTimeout(streamTimeoutTimer)
              streamTimeoutTimer = null
            }
            if (activeRunId) {
              persistActiveRun((runSessionKey, activeId) =>
                markRunStatus(runSessionKey, activeId, 'handoff'),
              )
              unregisterActiveSendRun(activeRunId)
              activeRunId = null
            }
            closeStream()
          },
        })
--- a/src/screens/chat/chat-screen.tsx
+++ b/src/screens/chat/chat-screen.tsx
@@ -1,8 +1,5 @@
 // Module-level local model override — set by composer when user picks a local model
 // Avoids prop threading. Reset when switching back to cloud models.
 export let _localModelOverride = ''
 export function setLocalModelOverride(model: string) { _localModelOverride = model }
 import {
  useCallback,
  useEffect,
@@ -21,12 +18,12 @@ import {
  textFromMessage,
 } from './utils'
 import {
  advanceStickyStreamingText,
  createResponseWaitSnapshot,
  createOptimisticMessage,
  createResponseWaitSnapshot,
  isTerminalActiveRunStatus,
-  shouldClearWaitingForAssistantMessage,
+  shouldClearWaitingForAssistantMessage
  type ResponseWaitSnapshot,
 } from './chat-screen-utils'
 import {
  appendHistoryMessage,
@@ -43,21 +40,20 @@ import { ChatEmptyState } from './components/chat-empty-state'
 import { ChatComposer } from './components/chat-composer'
 import { ConnectionStatusMessage } from './components/connection-status-message'
 import {
  clearPendingSendForSession,
  consumePendingSend,
  hasPendingGeneration,
  hasPendingSend,
  isRecentSession,
  resetPendingSend,
  setPendingGeneration,
  clearPendingSendForSession,
 } from './pending-send'
 import { useChatMeasurements } from './hooks/use-chat-measurements'
 import { useChatHistory } from './hooks/use-chat-history'
 import { useRealtimeChatHistory } from './hooks/use-realtime-chat-history'
 import { snapshotOptimisticUserMessages } from './hooks/optimistic-message-reinject'
 import { useSmoothStreamingText } from './hooks/use-smooth-streaming-text'
 import { useStreamingMessage } from './hooks/use-streaming-message'
 import { playChatComplete } from '@/lib/sounds'
 import { useChatSettingsStore } from '@/hooks/use-chat-settings'
 import { useActiveRunCheck } from './hooks/use-active-run-check'
 import { useChatMobile } from './hooks/use-chat-mobile'
 import { useChatSessions } from './hooks/use-chat-sessions'
@@ -70,6 +66,7 @@ import {
  CHAT_PENDING_COMMAND_STORAGE_KEY,
  CHAT_RUN_COMMAND_EVENT,
 } from './chat-events'
 import type {ResponseWaitSnapshot} from './chat-screen-utils';
 import type {
  ChatComposerAttachment,
  ChatComposerHandle,
@@ -79,6 +76,9 @@ import type {
 import type { ApprovalRequest } from '@/screens/gateway/lib/approvals-store'
 import type { ChatAttachment, ChatMessage, SessionMeta } from './types'
 import type { ChatRunCommandDetail } from './chat-events'
 import type {AgentActivity} from '@/stores/chat-activity-store';
 import { useChatSettingsStore } from '@/hooks/use-chat-settings'
 import { playChatComplete } from '@/lib/sounds'
 import {
  addApproval,
  loadApprovals,
@@ -101,12 +101,16 @@ import { MobileSessionsPanel } from '@/components/mobile-sessions-panel'
 import { ContextAlertModal } from '@/components/usage-meter/context-alert-modal'
 import { ErrorToastContainer, showErrorToast } from '@/components/error-toast'
 // ContextMeter removed — ContextBar (PR #32) replaces it
-import { useChatStore, persistRecoveryMessage } from '@/stores/chat-store'
+import { persistRecoveryMessage, useChatStore } from '@/stores/chat-store'
 import { useSessionModelStore } from '@/stores/session-model-store'
 import { useResearchCard } from '@/hooks/use-research-card'
 // MOBILE_TAB_BAR_OFFSET removed — tab bar always hidden in chat
 import { useTapDebug } from '@/hooks/use-tap-debug'
 import { useChatMode } from '@/hooks/use-chat-mode'
-import { useChatActivityStore, type AgentActivity } from '@/stores/chat-activity-store'
+import {  useChatActivityStore } from '@/stores/chat-activity-store'
 export let _localModelOverride = ''
 export function setLocalModelOverride(model: string) { _localModelOverride = model }
 type ChatScreenProps = {
  activeFriendlyId: string
@@ -481,45 +485,6 @@ export function ChatScreen({
  const portableChatFriendlyId = isPortableMode ? 'main' : activeFriendlyId
  // --- Issue #43 fix: lift waitingForResponse into persistent Zustand store ---
  // The store survives component unmount, so navigating away mid-stream
  // doesn't lose the "waiting" flag. sessionStorage backup handles reloads.
  const storeWaiting = useChatStore((s) => s.waitingSessionKeys)
  // resolvedSessionKey isn't available yet (defined below), so we track it via
  // a ref that's updated once it resolves. The memo/callback read the ref.
  const sessionKeyForWaiting = useRef<string | undefined>(undefined)
  const [activeRunCheckDone, setActiveRunCheckDone] = useState(false)
  // Track stale-restored sessions that need API verification before showing thinking.
  // On page reload, sessionStorage may contain stale "waiting" flags from a
  // previous session. We must not show the thinking indicator until the
  // active-run API check confirms the run is genuinely active. (Issue #449)
  const pendingVerifySessionKeyRef = useRef<string | undefined>(undefined)
  const waitingForResponse = useMemo(() => {
    const key = sessionKeyForWaiting.current
    if (!key) return hasPendingSend() || hasPendingGeneration()
    // If we restored waiting state from sessionStorage but haven't verified
    // with the API yet, don't show thinking — it might be stale (Issue #449).
    if (
      storeWaiting.has(key) &&
      pendingVerifySessionKeyRef.current === key &&
      !activeRunCheckDone
    ) {
      return false
    }
    return storeWaiting.has(key)
  }, [storeWaiting, activeRunCheckDone])
  const setWaitingForResponse = useCallback((waiting: boolean) => {
    const store = useChatStore.getState()
    const key = sessionKeyForWaiting.current
    if (!key) return
    if (waiting) {
      store.setSessionWaiting(key)
    } else {
      store.clearSessionWaiting(key)
    }
  }, [])
  const [liveToolActivity, setLiveToolActivity] = useState<
    Array<{ name: string; timestamp: number }>
  >([])
@@ -540,10 +505,18 @@ export function ChatScreen({
    if (typeof window === 'undefined') return 'low'
    const key = `claude-thinking-${activeFriendlyId || 'new'}`
    const stored = window.sessionStorage.getItem(key)
-    if (stored === 'off' || stored === 'low' || stored === 'adaptive')
+    if (stored === 'off' || stored === 'low' || stored === 'medium' || stored === 'high' || stored === 'adaptive')
      return stored
    return 'low'
  })
  // Tracks whether the user has explicitly picked a thinking level for this session.
  // A missing/absent sessionStorage key means we should fall back to the Hermes config default.
  const thinkingInitializedByUserRef = useRef(false)
  useEffect(() => {
    if (typeof window === 'undefined') return
    const key = `claude-thinking-${activeFriendlyId || 'new'}`
    thinkingInitializedByUserRef.current = window.sessionStorage.getItem(key) !== null
  }, [activeFriendlyId])
  const { alertOpen, alertThreshold, alertPercent, dismissAlert } =
    useContextAlert()
@@ -611,10 +584,61 @@ export function ChatScreen({
    portableMode: isPortableMode,
  })
  // --- Waiting state management (Issue #43 + #449) ---
  // resolvedSessionKey is now available (defined above from useChatHistory).
  const storeWaiting = useChatStore((s) => s.waitingSessionKeys)
  const sessionKeyForWaiting = useRef<string | undefined>(undefined)
  const pendingVerifySessionKeyRef = useRef<string | undefined>(undefined)
  // Keep the waiting-state ref in sync with the resolved session key
  sessionKeyForWaiting.current = resolvedSessionKey
-  // Detect stale restored waiting state from sessionStorage — we need API
+  // Synchronously detect stale waiting state from sessionStorage.
  // This runs during render (not in an effect) so the guard in
  // waitingForResponse is active on the very first render, preventing
  // a flash of the "Thinking" indicator when reopening an old session.
  const needsStaleCheck =
    resolvedSessionKey &&
    !isNewChat &&
    storeWaiting.has(resolvedSessionKey) &&
    pendingVerifySessionKeyRef.current !== resolvedSessionKey
  if (needsStaleCheck) {
    pendingVerifySessionKeyRef.current = resolvedSessionKey
  }
  // Track whether the active-run API check has completed.
  // Initialize to false when we detect stale state (needs verification),
  // true otherwise. This prevents showing "Thinking" until the API confirms.
  const [activeRunCheckDone, setActiveRunCheckDone] = useState(!needsStaleCheck)
  const waitingForResponse = useMemo(() => {
    const key = sessionKeyForWaiting.current
    if (!key) return hasPendingSend() || hasPendingGeneration()
    // If we restored waiting state from sessionStorage but haven't verified
    // with the API yet, don't show thinking — it might be stale (Issue #449).
    if (
      storeWaiting.has(key) &&
      pendingVerifySessionKeyRef.current === key &&
      !activeRunCheckDone
    ) {
      return false
    }
    return storeWaiting.has(key)
  }, [storeWaiting, activeRunCheckDone])
  const setWaitingForResponse = useCallback((waiting: boolean) => {
    const store = useChatStore.getState()
    const key = sessionKeyForWaiting.current
    if (!key) return
    if (waiting) {
      store.setSessionWaiting(key)
    } else {
      store.clearSessionWaiting(key)
    }
  }, [])
  // verification before showing thinking (Issue #449).
  useEffect(() => {
    const currentSessionKey = resolvedSessionKey
@@ -868,13 +892,12 @@ export function ChatScreen({
  const streamStart = useCallback(() => {
    if (!activeFriendlyId || isNewChat) return
-    // Bug #3 fix: no more 350ms polling loop — SSE handles realtime updates.
+    // No aggressive delayed refetch here — it wipes optimistic user messages
-    // Single delayed fetch as fallback to catch the initial response.
+    // from the cache before the server has echoed them, causing the user's
-    if (streamTimer.current) window.clearTimeout(streamTimer.current)
+    // message to disappear until the agent completes. The existing failsafes
-    streamTimer.current = window.setTimeout(() => {
+    // (5s + 10s timeouts at lines below, active-run polling) handle the case
-      if (activeRealtimeStreamingRef.current) return
+    // where SSE misses the done event.
-      refreshHistoryRef.current()
+    void activeFriendlyId // keep dep for eslint
    }, 2000)
  }, [activeFriendlyId, isNewChat])
  refreshHistoryRef.current = function refreshHistory() {
@@ -883,37 +906,21 @@ export function ChatScreen({
    // Snapshot any unconfirmed optimistic user messages BEFORE refetch.
    // The refetch replaces the query cache with server data — if the server
    // hasn't processed the user's POST yet, the optimistic message vanishes.
-    const currentMessages = (historyQuery.data as any)?.messages as
+    const historySessionKey = isPortableMode
-      | Array<ChatMessage>
+      ? 'main'
-      | undefined
+      : activeSessionKey ||
-    const pendingOptimistic = (currentMessages ?? []).filter((msg) => {
+        sessionKeyForHistory ||
-      const raw = msg as Record<string, unknown>
+        resolvedSessionKey ||
-      return (
+        'main'
-        msg.role === 'user' &&
+    const reInjectOptimistic = snapshotOptimisticUserMessages(
-        (normalizeMessageValue(raw.__optimisticId).startsWith('opt-') ||
+      queryClient,
-          normalizeMessageValue(raw.status) === 'sending')
+      portableChatFriendlyId,
-      )
+      historySessionKey,
-    })
+    )
    void historyQuery.refetch().then(() => {
      // Re-inject optimistic messages that weren't in the server response
-      if (pendingOptimistic.length === 0) return
+      reInjectOptimistic()
      const historySessionKey = isPortableMode
        ? 'main'
        : activeSessionKey ||
          sessionKeyForHistory ||
          resolvedSessionKey ||
          'main'
      if (!portableChatFriendlyId || !historySessionKey) return
      for (const optimistic of pendingOptimistic) {
        appendHistoryMessage(
          queryClient,
          portableChatFriendlyId,
          historySessionKey,
          optimistic,
        )
      }
    })
  }
@@ -1018,6 +1025,29 @@ export function ChatScreen({
    retry: false,
  })
  // Fetch the configured reasoning effort so the Chat Controls default matches
  // what Hermes actually uses instead of hardcoding 'low'.
  const reasoningEffortQuery = useQuery({
    queryKey: ['hermes-config', 'reasoning-effort'],
    queryFn: async () => {
      try {
        const res = await fetch('/api/hermes-config')
        if (!res.ok) return 'low'
        const data = await res.json() as { config?: Record<string, unknown> }
        const agentSection = data?.config?.agent
        if (agentSection && typeof agentSection === 'object' && !Array.isArray(agentSection)) {
          const effort = (agentSection as Record<string, unknown>).reasoning_effort
          if (effort === 'off' || effort === 'low' || effort === 'medium' || effort === 'high') return effort
        }
        return 'low'
      } catch {
        return 'low'
      }
    },
    staleTime: 10 * 60 * 1000,
    retry: false,
  })
  const availableModelIds = useMemo(() => {
    const models = modelsQuery.data?.models || []
    return models.map((m: any) => m.id).filter((id: string) => id)
@@ -1054,6 +1084,16 @@ export function ChatScreen({
    }
  }, [currentModel, activeFriendlyId])
  // If no per-session thinking level override exists, inherit from Hermes config
  useEffect(() => {
    if (thinkingInitializedByUserRef.current) return
    const configEffort = reasoningEffortQuery.data
    if (!configEffort) return
    if (configEffort === 'off' || configEffort === 'low' || configEffort === 'medium' || configEffort === 'high') {
      setThinkingLevel(configEffort)
    }
  }, [reasoningEffortQuery.data])
  // Persist thinking level changes to sessionStorage
  const handleThinkingLevelChange = useCallback(
    (level: ThinkingLevel) => {
@@ -1378,7 +1418,7 @@ export function ChatScreen({
      return deduped
    }
-    const nextMessages = [...deduped]
+    let nextMessages = [...deduped]
    const streamToolCalls = activeToolCalls.map((toolCall) => ({
      ...toolCall,
      phase: toolCall.phase,
@@ -1394,6 +1434,42 @@ export function ChatScreen({
      __streamToolCalls: streamToolCalls,
    } as ChatMessage
    // Check if the server has already returned a completed assistant message
    // that overlaps with the streaming text. If so, drop the streaming
    // placeholder to avoid showing the same response twice.
    const streamingText = stableActiveStreamingText.trim()
    const hasServerAssistantVersion = nextMessages.some((msg) => {
      if (msg.role !== 'assistant') return false
      if (msg.__streamingStatus === 'streaming') return false
      // Any non-streaming assistant message that appears after the last user
      // message is potentially the same response — match by text overlap
      if (streamingText.length > 0) {
        const msgText = textFromMessage(msg).trim()
        if (msgText.length > 0 && (
          msgText === streamingText ||
          msgText.startsWith(streamingText) ||
          streamingText.startsWith(msgText)
        )) {
          return true
        }
      }
      // Also match by tool calls: if the server message has the same tool
      // calls as the streaming placeholder, it's the same response
      if (streamToolCalls.length > 0) {
        const msgContent = Array.isArray(msg.content) ? msg.content : []
        const msgToolCalls = msgContent.filter((p: any) => p.type === 'toolCall')
        if (msgToolCalls.length > 0 && msgToolCalls.length === streamToolCalls.length) {
          return streamToolCalls.every((stc: any) =>
            msgToolCalls.some((mtc: any) => mtc.name === stc.name)
          )
        }
      }
      return false
    })
    if (hasServerAssistantVersion) {
      return nextMessages
    }
    const existingStreamIdx = nextMessages.findIndex(
      (message) => message.__streamingStatus === 'streaming',
    )
@@ -1403,6 +1479,13 @@ export function ChatScreen({
        ...nextMessages[existingStreamIdx],
        ...streamingMsg,
      }
      // Remove any other streaming messages (e.g. from mergeHistoryMessages
      // appending a realtime message after finalDisplayMessages already
      // injected a placeholder). Keep only one streaming placeholder.
      const keepIdx = existingStreamIdx
      nextMessages = nextMessages.filter(
        (m, i) => i === keepIdx || m.__streamingStatus !== 'streaming',
      )
      return nextMessages
    }
--- a/src/screens/chat/components/chat-composer.tsx
+++ b/src/screens/chat/components/chat-composer.tsx
@@ -36,8 +36,8 @@ import type {
 } from '@/components/slash-command-menu'
 import {
  DEFAULT_SLASH_COMMANDS,
  mergeSlashCommands,
  SlashCommandMenu,
  mergeSlashCommands,
 } from '@/components/slash-command-menu'
 import {
  PromptInput,
@@ -61,6 +61,7 @@ import {
  emitSearchModalEvent,
 } from '@/hooks/use-search-modal'
 import { setLocalModelOverride } from '@/screens/chat/local-model-override'
 import { formatModelName } from '@/lib/format-model-name'
 type ChatComposerAttachment = {
  id: string
@@ -72,7 +73,7 @@ type ChatComposerAttachment = {
  kind?: 'image' | 'file' | 'audio'
 }
-type ThinkingLevel = 'off' | 'low' | 'medium' | 'high'
+type ThinkingLevel = 'off' | 'low' | 'medium' | 'high' | 'adaptive'
 type ChatComposerProps = {
  onSubmit: (
@@ -565,6 +566,43 @@ function getResolvedModelKey(model: string, provider?: string): string {
  return `${normalizedProvider}/${normalizedModel}`
 }
 /**
 * Checks whether a model entry matches the current model string.
 *
 * The current model can arrive in several formats depending on the source:
 *   - "provider/model-id"  (from session-status API, persisted session model)
 *   - "model-id"           (bare ID from config or old data)
 *
 * The entry always has { id, provider } from the models catalog.
 *
 * We match if:
 *   1. The current model equals the entry ID exactly (bare match), or
 *   2. The current model ends with "/<entry.id>" (provider-prefixed match), or
 *   3. The resolved key from entry (provider/id) equals the current model.
 */
 function isCurrentModel(
  currentModel: string,
  entryId: string,
  entryProvider: string,
 ): boolean {
  const cm = currentModel.trim()
  const eid = entryId.trim()
  const eprov = entryProvider.trim()
  if (!cm || !eid) return false
  // Exact match (bare ID)
  if (cm === eid) return true
  // Current model is "something/<entryId>"
  if (cm.endsWith(`/${eid}`)) return true
  // Resolved entry key matches current model exactly
  const resolved = eprov ? `${eprov}/${eid}` : eid
  if (resolved === cm) return true
  return false
 }
 function isCanvasSupported(): boolean {
  if (typeof document === 'undefined') return false
  try {
@@ -1671,7 +1709,7 @@ function ChatComposerComponent({
  const promptPlaceholder = isMobileViewport
    ? 'Message...'
    : 'Ask anything... (↵ to send · ⇧↵ new line · ⌘⇧M switch model)'
-  const [serverCommands, setServerCommands] = useState<SlashCommandDefinition[]>([])
+  const [serverCommands, setServerCommands] = useState<Array<SlashCommandDefinition>>([])
  useEffect(() => {
    fetch('/api/commands')
@@ -2566,9 +2604,11 @@ function ChatComposerComponent({
                            unpinnedGroups.set(entry.provider, group)
                          }
                          const renderEntry = (entry: (typeof parsed)[0]) => {
-                            const isActive =
+                            const isActive = isCurrentModel(
-                              entry.id === currentModel ||
+                              persistedSessionModel || currentModel,
-                              `${defaultProvider}/${entry.id}` === currentModel
+                              entry.id,
                              entry.provider,
                            )
                            return (
                              <div
                                key={entry.id}
@@ -2757,9 +2797,9 @@ function ChatComposerComponent({
                        setIsThinkingMenuOpen(false)
                        setIsModelMenuOpen(false)
                      }}
-                      className="inline-flex h-8 items-center gap-1 rounded-full bg-primary-100/70 px-2 text-xs font-medium text-primary-600 transition-colors hover:bg-primary-200/80 dark:hover:bg-primary-800/60"
+                      className="inline-flex h-8 items-center gap-1.5 rounded-full bg-primary-100/70 px-2 text-xs font-medium text-primary-600 transition-colors hover:bg-primary-200/80 dark:hover:bg-primary-800/60"
-                      title="Chat controls"
+                      title={`Chat controls · ${modelButtonLabel}`}
-                      aria-label="Chat controls"
+                      aria-label={`Chat controls, current model: ${modelButtonLabel}`}
                    >
                      <svg
                        width="13"
@@ -2779,6 +2819,7 @@ function ChatComposerComponent({
                        <circle cx="15" cy="12" r="2" fill="currentColor" stroke="none" />
                        <circle cx="11" cy="18" r="2" fill="currentColor" stroke="none" />
                      </svg>
                      <span className="max-w-[5rem] truncate sm:max-w-[8rem] md:max-w-[10rem]">{formatModelName(modelButtonLabel)}</span>
                      <HugeiconsIcon icon={ArrowDown01Icon} size={11} />
                    </button>
                    {isControlsMenuOpen ? (
@@ -2946,7 +2987,11 @@ function ChatComposerComponent({
                                        unpinnedGroups.set(entry.provider, group)
                                      }
                                      const renderEntry = (entry: (typeof parsed)[0]) => {
-                                        const isActive = entry.id === currentModel || `${defaultProvider}/${entry.id}` === currentModel
+                                        const isActive = isCurrentModel(
                                          persistedSessionModel || currentModel,
                                          entry.id,
                                          entry.provider,
                                        )
                                        return (
                                          <div key={entry.id} className="group relative flex items-center">
                                            <button
--- a/src/screens/chat/components/chat-header.tsx
+++ b/src/screens/chat/components/chat-header.tsx
@@ -64,7 +64,7 @@ function formatMobileSessionTitle(rawTitle: string): string {
  return title
 }
-type ThinkingLevel = 'off' | 'low' | 'adaptive'
+type ThinkingLevel = 'off' | 'low' | 'medium' | 'high' | 'adaptive'
 type ChatHeaderProps = {
  activeTitle: string
--- a/src/screens/chat/components/chat-message-list.tsx
+++ b/src/screens/chat/components/chat-message-list.tsx
@@ -25,6 +25,7 @@ import { AssistantAvatar } from '@/components/avatars'
 import { cn } from '@/lib/utils'
 import { hapticTap } from '@/lib/haptics'
 import { CHAT_OPEN_MESSAGE_SEARCH_EVENT } from '@/screens/chat/chat-events'
 import { useChatStore } from '@/stores/chat-store'
 /** Duration (ms) the thinking indicator stays visible after waitingForResponse
 *  clears, giving the first response message time to render before the
@@ -179,28 +180,49 @@ type ThinkingBubbleProps = {
  liveToolActivity?: Array<{ name: string; timestamp: number }>
  researchCard?: UseResearchCardResult
  isCompacting?: boolean
  /** When true, always show "Thinking…" regardless of activity. Used for the
   * first 10s before the delayed activity feed appears. */
  forceSimple?: boolean
 }
 /**
- * Premium shimmer thinking bubble — matches the assistant message position
+ * Shows a thinking indicator with animated dots and a meaningful status
 * with three bouncing dots, a gradient shimmer sweep, and a dynamic status
 * label that reflects what's actually happening (tool calls, etc.).
 * When forceSimple is true, suppresses all activity labels — just "Thinking…".
 */
 function ThinkingBubble({
-  activeToolCalls: _activeToolCalls = [],
+  activeToolCalls = [],
-  liveToolActivity: _liveToolActivity = [],
+  liveToolActivity = [],
  researchCard,
  isCompacting = false,
  forceSimple = false,
 }: ThinkingBubbleProps) {
-  const statusLabel = isCompacting ? 'Compacting context...' : 'Thinking…'
+  // Fallback activity from heartbeat — shows last known agent activity
  // when no tool calls are in flight (e.g. during pure reasoning)
  const heartbeatActivity = useChatStore((s) => s.heartbeatActivity)
-  // Elapsed time counter — resets when the status label changes (new tool)
+  // Build a meaningful status label from live activity
  const activeToolNames = activeToolCalls
    .filter((tc) => tc.phase !== 'done' && tc.phase !== 'complete' && tc.phase !== 'completed')
    .map((tc) => tc.name.replace(/_/g, ' '))
  const liveToolNames = liveToolActivity.map((a) => a.name.replace(/_/g, ' '))
  const uniqueNames = [...new Set([...activeToolNames, ...liveToolNames])]
  const activityLabel =
    uniqueNames.length > 0
      ? `Using: ${uniqueNames.slice(0, 3).join(', ')}${uniqueNames.length > 3 ? ` +${uniqueNames.length - 3} more` : ''}`
      : null
  const statusLabel = isCompacting
    ? 'Compacting context...'
    : forceSimple
      ? 'Thinking…'
      : activityLabel || heartbeatActivity || 'Thinking…'
  // Elapsed time counter — counts from bubble mount, not from last label change
  const [elapsed, setElapsed] = useState(0)
  useEffect(() => {
    setElapsed(0)
    const interval = window.setInterval(() => setElapsed((s) => s + 1), 1000)
    return () => window.clearInterval(interval)
-  }, [statusLabel])
+  }, [])
  const elapsedLabel =
    elapsed >= 60
@@ -351,6 +373,33 @@ function ThinkingBubble({
  )
 }
 /** Minimal status line shown after 10s of thinking when no tool calls
 *  are in flight yet. Shows heartbeat status + elapsed time. */
 function StatusLine() {
  const heartbeatActivity = useChatStore((s) => s.heartbeatActivity)
  const [elapsed, setElapsed] = useState(0)
  useEffect(() => {
    const interval = window.setInterval(() => setElapsed((s) => s + 1), 1000)
    return () => window.clearInterval(interval)
  }, [])
  const elapsedLabel =
    elapsed >= 60
      ? `${Math.floor(elapsed / 60)}m ${elapsed % 60}s`
      : `${elapsed}s`
  return (
    <div className="flex items-center gap-2 text-[11px] text-primary-400 dark:text-primary-500 py-0.5">
      <span className="inline-block size-1.5 rounded-full bg-amber-400 animate-pulse" />
      <span className="opacity-80">
        {heartbeatActivity || 'Working…'}
      </span>
      <span aria-hidden="true" className="opacity-40">·</span>
      <span className="tabular-nums opacity-50 font-mono">{elapsedLabel}</span>
    </div>
  )
 }
 const VIRTUAL_ROW_HEIGHT = 136
 const VIRTUAL_OVERSCAN = 8
 const NEAR_BOTTOM_THRESHOLD = 200
@@ -606,6 +655,13 @@ function ChatMessageListComponent({
  const [unreadCount, setUnreadCount] = useState(0)
  const [expandAllToolSections, setExpandAllToolSections] = useState(false)
  // Activity feed delay: only show tool activity after 10s of thinking.
  // For the first 10s, the ThinkingBubble stays simple ("Thinking…").
  const THINKING_ACTIVITY_DELAY_S = 10
  const [thinkingElapsed, setThinkingElapsed] = useState(0)
  const thinkingStartRef = useRef<number>(0)
  const thinkingTimerRef = useRef<ReturnType<typeof setInterval> | null>(null)
  // Bug 2 fix: grace period — keep thinking indicator alive briefly after
  // waitingForResponse clears so the response message has time to render.
  const [thinkingGrace, setThinkingGrace] = useState(false)
@@ -1107,6 +1163,52 @@ function ChatMessageListComponent({
    researchCard && researchCard.steps.length > 0,
  )
  // Compute visibility of the entire bottom thinking area — the same gate
  // used for rendering (lines below). Start / stop the elapsed timer here.
  const thinkingAreaVisible =
    showTypingIndicator ||
    showResearchCard ||
    isCompacting ||
    liveToolActivity.length > 0 ||
    (isStreaming && !streamingText) ||
    (isStreaming && activeToolCalls.length > 0)
  // Track how long the thinking area has been visible to gate the delayed
  // activity feed (10s threshold).
  useEffect(() => {
    if (thinkingAreaVisible) {
      if (thinkingStartRef.current === 0) {
        thinkingStartRef.current = Date.now()
        setThinkingElapsed(0)
      }
      if (!thinkingTimerRef.current) {
        thinkingTimerRef.current = setInterval(() => {
          setThinkingElapsed(
            Math.floor((Date.now() - thinkingStartRef.current) / 1000),
          )
        }, 250)
      }
    } else {
      if (thinkingTimerRef.current) {
        clearInterval(thinkingTimerRef.current)
        thinkingTimerRef.current = null
      }
      thinkingStartRef.current = 0
      setThinkingElapsed(0)
    }
    return () => {
      if (thinkingTimerRef.current) {
        clearInterval(thinkingTimerRef.current)
        thinkingTimerRef.current = null
      }
    }
  }, [thinkingAreaVisible])
  const showActivityFeed =
    thinkingElapsed >= THINKING_ACTIVITY_DELAY_S ||
    activeToolCalls.length > 0 ||
    liveToolActivity.length > 0
  const shouldBottomPin =
    visibleEntries.length > 0 ||
    showToolOnlyNotice ||
@@ -1146,11 +1248,11 @@ function ChatMessageListComponent({
          args: tcAny.args,
          preview:
            typeof tcAny.preview === 'string'
-              ? (tcAny.preview as string)
+              ? (tcAny.preview)
              : undefined,
          result:
            typeof tcAny.result === 'string'
-              ? (tcAny.result as string)
+              ? (tcAny.result)
              : undefined,
        }
      })
@@ -1823,12 +1925,12 @@ function ChatMessageListComponent({
                  liveToolActivity={liveToolActivity}
                  researchCard={researchCard}
                  isCompacting={isCompacting}
                  forceSimple={!showActivityFeed}
                />
-                {/* Branch from the thinking bubble into a single compact
+                {/* After 10s of thinking, show activity feed. With tool calls:
-                    TUI-style tool activity card. Use normalized streaming calls
+                    compact CLI-style TuiActivityCard (last 3). Without tool calls:
-                    so the card appears for both structured tool events and the
+                    a minimal status line showing elapsed time and heartbeat. */}
-                    lighter live activity feed. */}
+                {showActivityFeed ? (
                {normalizedStreamingToolCalls.length > 0 ? (
                  <div className="flex max-w-[var(--chat-content-max-width)]">
                    <div
                      className="ml-[14px] mr-2 w-px shrink-0"
@@ -1839,51 +1941,55 @@ function ChatMessageListComponent({
                      aria-hidden
                    />
                    <div className="min-w-0 flex-1 pt-1">
-                      <TuiActivityCard
+                      {normalizedStreamingToolCalls.length > 0 ? (
-                        toolSections={normalizedStreamingToolCalls.map((tc) => {
+                        <TuiActivityCard
-                          const phase = tc.phase
+                          toolSections={normalizedStreamingToolCalls.slice(-3).map((tc) => {
-                          const state =
+                            const phase = tc.phase
-                            phase === 'error'
+                            const state =
-                              ? ('output-error' as const)
+                              phase === 'error'
-                              : phase === 'done'
+                                ? ('output-error' as const)
-                                ? ('output-available' as const)
+                                : phase === 'done'
-                                : phase === 'running'
+                                  ? ('output-available' as const)
-                                  ? ('input-streaming' as const)
+                                  : phase === 'running'
-                                  : ('input-available' as const)
+                                    ? ('input-streaming' as const)
-                          return {
+                                    : ('input-available' as const)
-                            key: tc.id,
+                            return {
-                            type: tc.name,
+                              key: tc.id,
-                            input:
+                              type: tc.name,
-                              tc.args &&
+                              input:
-                              typeof tc.args === 'object' &&
+                                tc.args &&
-                              !Array.isArray(tc.args)
+                                typeof tc.args === 'object' &&
-                                ? (tc.args as Record<string, unknown>)
+                                !Array.isArray(tc.args)
-                                : undefined,
+                                  ? (tc.args as Record<string, unknown>)
-                            preview: tc.preview,
+                                  : undefined,
-                            outputText:
+                              preview: tc.preview,
-                              state === 'output-available'
+                              outputText:
-                                ? tc.result || ''
+                                state === 'output-available'
-                                : '',
+                                  ? tc.result || ''
-                            errorText:
+                                  : '',
-                              state === 'output-error'
+                              errorText:
-                                ? tc.result || 'Tool failed'
+                                state === 'output-error'
-                                : undefined,
+                                  ? tc.result || 'Tool failed'
-                            state,
+                                  : undefined,
-                          }
+                              state,
-                        })}
+                            }
-                        thinking={null}
+                          })}
-                        isStreaming={true}
+                          thinking={null}
-                        formatLabel={(name) => name.replace(/_/g, ' ')}
+                          isStreaming={true}
-                        formatArg={(_name, args) => {
+                          formatLabel={(name) => name.replace(/_/g, ' ')}
-                          if (!args) return null
+                          formatArg={(_name, args) => {
-                          const first = Object.values(args).find(
+                            if (!args) return null
-                            (v) => typeof v === 'string' && v.trim(),
+                            const first = Object.values(args).find(
-                          )
+                              (v) => typeof v === 'string' && v.trim(),
-                          return typeof first === 'string'
+                            )
-                            ? first.trim()
+                            return typeof first === 'string'
-                            : null
+                              ? first.trim()
-                        }}
+                              : null
-                      />
+                          }}
                        />
                      ) : (
                        <StatusLine />
                      )}
                    </div>
                  </div>
                ) : null}
@@ -1955,11 +2061,24 @@ function getStableMessageId(message: ChatMessage, index: number): string {
  }
  const timestamp = getRawMessageTimestamp(message)
  const text = textFromMessage(message)
  // Content-based fingerprint: hash of text content + timestamp.
  // This survives reordering because it doesn't depend on array position.
  const fingerprint = djb2(text.slice(0, 120))
  if (timestamp) {
-    return `${message.role ?? 'assistant'}-${timestamp}-${index}`
+    return `${message.role ?? 'assistant'}-${timestamp}-${fingerprint}`
  }
-  return `${message.role ?? 'assistant'}-${index}`
+  return `${message.role ?? 'assistant'}-${fingerprint}-${index}`
 }
 /** djb2 string hash — fast, decent distribution, no deps */
 function djb2(str: string): string {
  let hash = 5381
  for (let i = 0; i < str.length; i++) {
    hash = ((hash << 5) + hash + str.charCodeAt(i)) | 0
  }
  return (hash >>> 0).toString(36)
 }
 function getRawMessageTimestamp(message: ChatMessage): number | null {
--- a/src/screens/chat/components/message-item.tsx
+++ b/src/screens/chat/components/message-item.tsx
@@ -7,6 +7,11 @@ import {
  textFromMessage,
 } from '../utils'
 import { MessageActionsBar } from './message-actions-bar'
 import {
  buildHermesActivitySummary,
  shouldAutoExpandHermesActivityCard,
 } from './streaming-activity-ui'
 import { TuiActivityCard } from './tui-activity-card'
 import type { ChatAttachment, ChatMessage, ToolCallContent } from '../types'
 import type { ToolPart } from '@/components/prompt-kit/tool'
 import { AssistantAvatar, UserAvatar } from '@/components/avatars'
@@ -31,11 +36,6 @@ import {
  useChatSettingsStore,
 } from '@/hooks/use-chat-settings'
 import { cn } from '@/lib/utils'
 import {
  buildHermesActivitySummary,
  shouldAutoExpandHermesActivityCard,
 } from './streaming-activity-ui'
 import { TuiActivityCard } from './tui-activity-card'
 const WORDS_PER_TICK = 4
 const TICK_INTERVAL_MS = 50
@@ -2503,21 +2503,29 @@ function MessageItemComponent({
      {/* Grouped tool card above the assistant bubble. Only show once there
          is real assistant text in the bubble. While streaming with no text,
          the legacy ThinkingBubble in chat-message-list owns the visual and
-          renders its own branched TuiActivityCard so we don't double up. */}
+          renders its own branched TuiActivityCard so we don't double up.
          When done streaming, show a compact tool-count chip instead of
          the full expandable card. */}
      {!isUser &&
      finalToolSections.length > 0 &&
      (hasText || !effectiveIsStreaming) ? (
        <div className="w-full max-w-[var(--chat-content-max-width)] flex">
          <div className="w-6 shrink-0" aria-hidden />
          <div className="min-w-0 flex-1">
-            <TuiActivityCard
+            {effectiveIsStreaming ? (
-              toolSections={finalToolSections}
+              <TuiActivityCard
-              thinking={null}
+                toolSections={finalToolSections}
-              isStreaming={effectiveIsStreaming}
+                thinking={null}
-              expandAll={expandAllToolSections}
+                isStreaming={effectiveIsStreaming}
-              formatLabel={formatToolDisplayLabel}
+                expandAll={expandAllToolSections}
-              formatArg={keyArgLabel}
+                formatLabel={formatToolDisplayLabel}
-            />
+                formatArg={keyArgLabel}
              />
            ) : (
              <span className="inline-block text-[11px] text-primary-400 dark:text-primary-500 py-0.5 opacity-60">
                {finalToolSections.length} tool{finalToolSections.length !== 1 ? 's' : ''} used
              </span>
            )}
          </div>
        </div>
      ) : null}
--- a/src/screens/chat/hooks/optimistic-message-reinject.ts
+++ b/src/screens/chat/hooks/optimistic-message-reinject.ts
@@ -0,0 +1,88 @@
 import { appendHistoryMessage, chatQueryKeys } from '../chat-queries'
 import { textFromMessage } from '../utils'
 import type { QueryClient } from '@tanstack/react-query'
 import type { ChatMessage } from '../types'
 function normalize(value: unknown): string {
  return typeof value === 'string' ? value.trim() : ''
 }
 /**
 * Snapshot optimistic user messages from the history cache before a refetch,
 * then re-inject them after the refetch completes.
 *
 * The refetch replaces the query cache with server data which won't include
 * the optimistic message yet — without re-injection the user's message
 * disappears until the server echoes it.
 *
 * Matches messages that are:
 *   - Still optimistic (__optimisticId starts with "opt-")
 *   - In sending/queued state
 *   - Already confirmed by SSE (status "sent") but have no server id yet
 *     (only clientId) — these can still be lost during refetch.
 *
 * After refetch, the returned closure checks if the server already echoed
 * the user message (by clientId or text match) and skips re-injection to
 * avoid duplicates.
 *
 * Usage:
 *   const reInject = snapshotOptimisticUserMessages(queryClient, friendlyId, sessionKey)
 *   await queryClient.invalidateQueries(...)
 *   reInject()
 */
 export function snapshotOptimisticUserMessages(
  queryClient: QueryClient,
  friendlyId: string,
  sessionKey: string,
 ): () => void {
  const key = chatQueryKeys.history(friendlyId, sessionKey)
  const prevData = queryClient.getQueryData<Record<string, unknown>>(key)
  const pending = ((prevData?.messages as Array<unknown> | undefined) ?? []).filter(
    (msg: unknown) => {
      const raw = msg as Record<string, unknown>
      if (raw.role !== 'user') return false
      if (String(raw.__optimisticId ?? '').startsWith('opt-')) return true
      if (String(raw.status) === 'sending' || String(raw.status) === 'queued') return true
      if (String(raw.status) === 'sent') {
        // Re-inject only if the message has a clientId (local) but no server id
        const hasClientId = normalize(raw.clientId).length > 0 || normalize(raw.client_id).length > 0
        const hasServerId = normalize(raw.id).length > 0 || normalize(raw.messageId).length > 0
        return hasClientId && !hasServerId
      }
      return false
    },
  ) as unknown as Array<ChatMessage>
  return () => {
    const currentData = queryClient.getQueryData<Record<string, unknown>>(key)
    const currentMessages = (currentData?.messages as Array<unknown> | undefined) ?? []
    for (const msg of pending) {
      const raw = msg as unknown as Record<string, unknown>
      const msgClientId = normalize(raw.clientId) || normalize(raw.client_id)
      const msgText = textFromMessage(msg)
      const alreadyPresent = currentMessages.some((m: unknown) => {
        const mRaw = m as Record<string, unknown>
        if (mRaw.role !== 'user') return false
        if (msgClientId) {
          const mClientId = normalize(mRaw.clientId) || normalize(mRaw.client_id)
          if (mClientId && mClientId === msgClientId) return true
        }
        if (msgText.length > 0) {
          const mText = textFromMessage(m as ChatMessage)
          if (mText === msgText) {
            const msgTs = (raw.timestamp as number) || 0
            const mTs = (mRaw.timestamp as number) || 0
            if (msgTs && mTs && Math.abs(msgTs - mTs) < 10_000) return true
          }
        }
        return false
      })
      if (!alreadyPresent) {
        appendHistoryMessage(queryClient, friendlyId, sessionKey, msg)
      }
    }
  }
 }
--- a/src/screens/chat/hooks/use-active-run-check.ts
+++ b/src/screens/chat/hooks/use-active-run-check.ts
@@ -22,9 +22,14 @@ type ActiveRunResponse = {
 const ACTIVE_STATUSES: ReadonlySet<string> = new Set([
  'accepted',
  'active',
-  'handoff',
+  // NOTE: 'handoff' is deliberately excluded. A handoff run means the
  // SSE client disconnected — the browser has no active stream. Keeping
  // the waiting state alive for handoff runs causes ghost "Thinking"
  // indicators on session reopen for runs that completed hours ago.
 ])
 const ACTIVE_RUN_CHECK_TIMEOUT_MS = 2000
 /**
 * On mount, checks whether the server has an active run for this session.
 * If so, marks the session as waiting in the persistent Zustand store.
@@ -33,6 +38,10 @@ const ACTIVE_STATUSES: ReadonlySet<string> = new Set([
 * This closes the gap where a user navigates away during streaming,
 * the component unmounts (losing local state), and on remount the UI
 * doesn't know a run was in progress.
 *
 * A timeout (ACTIVE_RUN_CHECK_TIMEOUT_MS) ensures the check never blocks
 * the UI indefinitely — if the API is slow or unreachable, we assume the
 * run is dead and clear stale waiting state.
 */
 export function useActiveRunCheck({
  sessionKey,
@@ -55,6 +64,25 @@ export function useActiveRunCheck({
    hasCheckedRef.current = true
    const controller = new AbortController()
    let settled = false
    const settle = () => {
      if (settled) return
      settled = true
      onCompleteRef.current?.()
    }
    // Timeout: if the API check doesn't complete in time, assume the run is dead
    const timeoutId = window.setTimeout(() => {
      if (settled) return
      settle()
      try { controller.abort() } catch { /* ignore */ }
      // Clear stale waiting state — the run is almost certainly dead
      const store = useChatStore.getState()
      if (store.isSessionWaiting(sessionKeyRef.current)) {
        store.clearSessionWaiting(sessionKeyRef.current)
      }
    }, ACTIVE_RUN_CHECK_TIMEOUT_MS)
    async function check() {
      try {
@@ -62,10 +90,10 @@ export function useActiveRunCheck({
          `/api/sessions/${encodeURIComponent(sessionKey)}/active-run`,
          { signal: controller.signal },
        )
-        if (!response.ok) return
+        if (!response.ok) return finishCheck()
        const data = (await response.json()) as ActiveRunResponse
-        if (!data.ok) return
+        if (!data.ok) return finishCheck()
        const store = useChatStore.getState()
        if (data.run && ACTIVE_STATUSES.has(data.run.status)) {
@@ -75,15 +103,21 @@ export function useActiveRunCheck({
          store.clearSessionWaiting(sessionKey)
        }
      } catch {
-        // Network error or abort — ignore
+        // Network error or abort — ignore, already handled by timeout
      } finally {
-        onCompleteRef.current?.()
+        finishCheck()
      }
    }
    function finishCheck() {
      window.clearTimeout(timeoutId)
      settle()
    }
    void check()
    return () => {
      window.clearTimeout(timeoutId)
      controller.abort()
    }
  }, [sessionKey, enabled])
--- a/src/screens/chat/hooks/use-chat-history.ts
+++ b/src/screens/chat/hooks/use-chat-history.ts
@@ -702,11 +702,19 @@ function mergeOptimisticHistoryMessages(
    }
    // Preserve unconfirmed optimistic messages regardless of age.
    // Also preserve confirmed-sent messages that have a clientId but no
    // server id yet — they were acknowledged by SSE (onStarted) but
    // haven't been echoed by the server. Periodic refetches will drop
    // them otherwise (the "user message disappears" bug).
    const isSending =
      optimisticMessage.status === 'sending' ||
      Boolean(optimisticMessage.__optimisticId)
    const isSentButUnechoed =
      optimisticMessage.status === 'sent' &&
      Boolean(getMessageClientId(optimisticMessage)) &&
      !optimisticMessage.id
-    if (isSending) {
+    if (isSending || isSentButUnechoed) {
      merged.push(optimisticMessage)
    }
  }
--- a/src/screens/chat/hooks/use-realtime-chat-history.ts
+++ b/src/screens/chat/hooks/use-realtime-chat-history.ts
@@ -5,6 +5,7 @@ import { useChatStore } from '../../../stores/chat-store'
 import { appendHistoryMessage, chatQueryKeys } from '../chat-queries'
 import { toast } from '../../../components/ui/toast'
 import { textFromMessage } from '../utils'
 import { snapshotOptimisticUserMessages } from './optimistic-message-reinject'
 import type { ChatMessage } from '../types'
 import type { StreamingState } from '../../../stores/chat-store'
@@ -324,6 +325,14 @@ export function useRealtimeChatHistory({
            const prevCount =
              (prevData?.messages as Array<unknown> | undefined)?.length ?? 0
            // Snapshot optimistic user messages before refetch so they
            // survive the cache replacement. Re-injected after refetch.
            const reInjectOptimistic = snapshotOptimisticUserMessages(
              queryClient,
              effectiveFriendlyId,
              effectiveSessionKey,
            )
            // Issue #441 fix: Directly merge realtime buffer into history cache
            // INSTEAD of invalidateQueries. The old approach caused a race:
            // invalidateQueries → refetch (async) → merge runs with stale data
@@ -418,6 +427,8 @@ export function useRealtimeChatHistory({
                  )
                }
              }
              // Re-inject optimistic user messages that the server hasn't echoed yet
              reInjectOptimistic()
            })
            // Check for compaction — significant message count drop
--- a/src/screens/chat/hooks/use-streaming-message.ts
+++ b/src/screens/chat/hooks/use-streaming-message.ts
@@ -241,6 +241,7 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
        error: message,
      }))
      onError?.(message)
      useChatStore.getState().setHeartbeatActivity(null)
    },
    [
      clearHandoffTimer,
@@ -429,6 +430,7 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
      }
      onComplete?.(message)
      useChatStore.getState().setHeartbeatActivity(null)
    },
    [clearHandoffTimer, onComplete, stopFrame, unregisterSendStreamRun],
  )
@@ -444,7 +446,7 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
        typeof window !== 'undefined' &&
        window.localStorage?.getItem('hermes:debug:sse') === '1'
      ) {
-        // eslint-disable-next-line no-console
+         
        console.log(
          '[hermes-sse]',
          event,
@@ -754,6 +756,8 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
        }
        case 'heartbeat': {
          markActivity()
          const activity = (payload as { activity?: string | null }).activity ?? null
          useChatStore.getState().setHeartbeatActivity(activity)
          break
        }
        case 'close': {
@@ -851,6 +855,7 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
        streamingText: '',
        error: null,
      })
      useChatStore.getState().setHeartbeatActivity(null)
      try {
        const response = await fetch('/api/send-stream', {
--- a/src/stores/chat-store.ts
+++ b/src/stores/chat-store.ts
@@ -140,6 +140,11 @@ type ChatState = {
  clearSessionWaiting: (sessionKey: string) => void
  /** Check if a session is waiting for a response */
  isSessionWaiting: (sessionKey: string) => boolean
  /** Last activity description forwarded via heartbeat — used by ThinkingBubble
   *  to show meaningful progress during long reasoning stretches */
  heartbeatActivity: string | null
  setHeartbeatActivity: (activity: string | null) => void
 }
 const createEmptyStreamingState = (): StreamingState => ({
@@ -641,6 +646,7 @@ export const useChatStore = create<ChatState>((set, get) => ({
  sendStreamRunIds: new Set(),
  waitingSessionKeys: _restoredWaiting.keys,
  waitingSessionMeta: _restoredWaiting.meta,
  heartbeatActivity: null,
  setConnectionState: (connectionState, error) => {
    set({ connectionState, lastError: error ?? null })
@@ -687,6 +693,10 @@ export const useChatStore = create<ChatState>((set, get) => ({
    return get().waitingSessionKeys.has(sessionKey)
  },
  setHeartbeatActivity: (activity) => {
    set({ heartbeatActivity: activity })
  },
  processEvent: (event) => {
    const state = get()
    const sessionKey = event.sessionKey
@@ -893,6 +903,31 @@ export const useChatStore = create<ChatState>((set, get) => ({
        }
        if (duplicateIndex === -1) {
          // Multiple message.started events from the agent create distinct
          // realtime entries with empty content. Replace the previous empty
          // assistant message instead of appending — prevents "3 individual
          // messages then one final" bug where each tool phase looks like a
          // separate assistant bubble.
          if (
            incomingMessage.role === 'assistant' &&
            newPlainText.length === 0 &&
            sessionMessages.length > 0
          ) {
            const prevEmptyIdx = sessionMessages.findLastIndex(
              (m) =>
                m.role === 'assistant' &&
                extractMessageText(m).length === 0,
            )
            if (prevEmptyIdx >= 0) {
              sessionMessages[prevEmptyIdx] = incomingMessage
              messages.set(
                sessionKey,
                sortMessagesChronologically(sessionMessages),
              )
              set({ realtimeMessages: messages, lastEventAt: now })
              break
            }
          }
          sessionMessages.push(incomingMessage)
          messages.set(sessionKey, sortMessagesChronologically(sessionMessages))
          set({ realtimeMessages: messages, lastEventAt: now })
@@ -1209,6 +1244,13 @@ export const useChatStore = create<ChatState>((set, get) => ({
      if (histMsg.role === rtMsg.role && rtText) {
        const histText = extractMessageText(histMsg)
        if (histText === rtText) return true
        // Streaming realtime text is a prefix of the final server text.
        // Match either direction to prevent duplicates when the server
        // returns the complete message after the realtime buffer had a
        // partial version.
        if (rtText.length > 0 && histText.length > 0) {
          if (histText.startsWith(rtText) || rtText.startsWith(histText)) return true
        }
      }
      const histRaw = histMsg as Record<string, unknown>
--- a/vite.config.ts
+++ b/vite.config.ts
@@ -88,6 +88,15 @@ async function isClaudeAgentHealthy(port = 8642): Promise<boolean> {
 const config = defineConfig(({ mode, command }) => {
  const env = loadEnv(mode, process.cwd(), '')
  // Bridge loadEnv into process.env for server-side SSR runtime code that
  // reads env vars directly from process.env (e.g. getBearerToken() in
  // openai-compat-api.ts reads process.env.HERMES_API_TOKEN). Without this,
  // Vite's loadEnv only populates the local `env` object — not process.env.
  for (const key of Object.keys(env)) {
    if (!(key in process.env)) {
      process.env[key] = env[key]
    }
  }
  const claudeApiUrl = env.CLAUDE_API_URL?.trim() || 'http://127.0.0.1:8642'
  // /api/connection-status is handled by the real route file at
  // src/routes/api/connection-status.ts; the dev server no longer