PR #543: chat UIX/UX fixes — thinking indicators, message dedup, streaming stability (JohnGuidry)

Addresses #572 (double chat responses) + #561 (stuck Thinking indicator). Adds optimistic-message-reinject hook, vite loadEnv→process.env bridge for SSR bearer token, dedup + streaming stability. eslint --fix on touched files (net lint errors 1700→1588). Build GREEN, test 33 fail/694 pass (zero regressions).
2026-06-05 06:01:21 -04:00
parent ef2e4ba02b
commit 5271ca9ad3
14 changed files with 704 additions and 219 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -16,6 +16,7 @@ build
 .vinxi
 .nitro
 .tanstack
+.vite

 # Environment variables
 .env
--- a/src/routes/api/send-stream.ts
+++ b/src/routes/api/send-stream.ts
@@ -1,15 +1,10 @@
 import { createFileRoute } from '@tanstack/react-router'
 import { buildResolvedSessionHeaders } from '../../lib/send-stream-session-headers'
 import { buildWorkspaceScopedTextMessage } from '../../lib/workspace-message-scope'
-import {
-  collectSyntheticLiveToolEvents,
-  createSyntheticLiveToolTracker,
-} from './-send-stream-live-tools'
 import { resolveSessionKey } from '../../server/session-utils'
 import { isAuthenticated } from '../../server/auth-middleware'
 import { requireJsonContentType } from '../../server/rate-limit'
 import { publishChatEvent } from '../../server/chat-event-bus'
-import { loadWorkspaceCatalog } from './workspace'
 import {
  registerActiveSendRun,
  unregisterActiveSendRun,
@@ -22,8 +17,8 @@ import {
  upsertRunToolCall,
 } from '../../server/run-store'
 import { getChatMode } from '../../server/gateway-capabilities'
-import { ensureLocalSession, appendLocalMessage, getLocalMessages, touchLocalSession } from '../../server/local-session-store'
-import { getLocalProviderDef, getDiscoveredModels } from '../../server/local-provider-discovery'
+import { appendLocalMessage, ensureLocalSession, getLocalMessages, touchLocalSession } from '../../server/local-session-store'
+import { getDiscoveredModels, getLocalProviderDef } from '../../server/local-provider-discovery'
 import { openaiChat } from '../../server/openai-compat-api'
 import { streamResponses } from '../../server/responses-api'
 import { selectPortableConversationHistory } from '../../server/portable-history'
@@ -36,6 +31,11 @@ import {
  listSessions,
  streamChat,
 } from '../../server/claude-api'
+import { loadWorkspaceCatalog } from './workspace'
+import {
+  collectSyntheticLiveToolEvents,
+  createSyntheticLiveToolTracker,
+} from './-send-stream-live-tools'
 import type {OpenAICompatContentPart, OpenAICompatMessage} from '../../server/openai-compat-api';
 // Claude agent runs can take 5+ minutes with complex tool chains
 const SEND_STREAM_RUN_TIMEOUT_MS = 600_000
@@ -386,10 +386,43 @@ export const Route = createFileRoute('/api/send-stream')({
        let streamTimeoutTimer: ReturnType<typeof setTimeout> | null = null
        let heartbeatTimer: ReturnType<typeof setInterval> | null = null
        const abortController = new AbortController()
+        // Close out the SSE stream — stop enqueueing, clear timers, and
+        // abort the upstream Hermes gateway request so the agent stops
+        // processing.  Does NOT touch run status (persistActiveRun etc.).
+        // The abort path (request.signal / handleAbort) owns run cleanup.
        let closeStream = () => {
+          if (streamClosed) return
          streamClosed = true
+          if (heartbeatTimer) {
+            clearInterval(heartbeatTimer)
+            heartbeatTimer = null
+          }
+          if (unregisterTimer) {
+            clearTimeout(unregisterTimer)
+            unregisterTimer = null
+          }
+          if (streamTimeoutTimer) {
+            clearTimeout(streamTimeoutTimer)
+            streamTimeoutTimer = null
+          }
+          abortController.abort()
        }

+        // When the client hits Stop / navigates away / closes the tab, the
+        // request.signal fires abort.  Stop the upstream agent (closeStream)
+        // and clean up run tracking so we don't burn API credits on an orphan.
+        function handleAbort() {
+          if (activeRunId && !streamClosed) {
+            persistActiveRun((runSessionKey, activeId) =>
+              markRunStatus(runSessionKey, activeId, 'handoff'),
+            )
+            unregisterActiveSendRun(activeRunId)
+            activeRunId = null
+          }
+          closeStream()
+        }
+        request.signal.addEventListener('abort', () => handleAbort(), { once: true })
+
        const persistRunStarted = (
          runId: string | undefined,
          runSessionKey: string,
@@ -419,6 +452,11 @@ export const Route = createFileRoute('/api/send-stream')({
          async start(controller) {
            let heartbeatTimer: ReturnType<typeof setInterval> | null = null
            let lastClientEventAt = Date.now()
+            // Track the last human-readable activity so the heartbeat can
+            // forward it to the UI. Without this the ThinkingBubble shows a
+            // static "Thinking…" for minutes when the agent is reasoning
+            // without tool calls, making it look hung.
+            let lastActivity: string | null = null
            const enqueueRaw = (payload: string) => {
              if (streamClosed) return
              controller.enqueue(encoder.encode(payload))
@@ -462,10 +500,6 @@ export const Route = createFileRoute('/api/send-stream')({
                clearTimeout(streamTimeoutTimer)
                streamTimeoutTimer = null
              }
-              if (heartbeatTimer) {
-                clearInterval(heartbeatTimer)
-                heartbeatTimer = null
-              }
              if (activeRunId) {
                unregisterActiveSendRun(activeRunId)
                activeRunId = null
@@ -481,9 +515,11 @@ export const Route = createFileRoute('/api/send-stream')({
            // Keep the SSE stream alive during long agent processing (tool calls,
            // slow LLM responses on large contexts). Without this the client-side
            // no-activity timer fires after 2-3 min and aborts the stream.
+            // Every 10s we also forward the last known activity so the UI can
+            // show meaningful progress instead of a static "Thinking…".
            heartbeatTimer = setInterval(() => {
-              sendEvent('heartbeat', { timestamp: Date.now() })
-            }, 30_000)
+              sendEvent('heartbeat', { timestamp: Date.now(), activity: lastActivity })
+            }, 10_000)

            try {
              if (chatMode === 'portable') {
@@ -514,6 +550,7 @@ export const Route = createFileRoute('/api/send-stream')({
                  sessionKey: portableSessionKey,
                  friendlyId: portableFriendlyId,
                })
+                lastActivity = 'Processing your message...'

                try {
                  const userContent = buildMultimodalContent(
@@ -569,7 +606,7 @@ export const Route = createFileRoute('/api/send-stream')({
                  const useResponsesApi =
                    process.env.HERMES_USE_RESPONSES === '1' && !localBaseUrl
                  if (useResponsesApi) {
-                    let thinking = ''
+                    const thinking = ''
                    // Track tool calls by callId so a `tool.completed`
                    // followed by `tool.output` can carry the full
                    // arguments forward without losing them.
@@ -615,7 +652,7 @@ export const Route = createFileRoute('/api/send-stream')({
                          })
                          const argsForCard =
                            ev.args && typeof ev.args === 'object'
-                              ? (ev.args as Record<string, unknown>)
+                              ? (ev.args)
                              : undefined
                          persistActiveRun((runSessionKey, activeId) =>
                            upsertRunToolCall(runSessionKey, activeId, {
@@ -633,6 +670,7 @@ export const Route = createFileRoute('/api/send-stream')({
                            sessionKey: portableSessionKey,
                            runId,
                          })
+                          lastActivity = `Running: ${ev.name.replace(/_/g, ' ')}`
                          continue
                        }
                        if (ev.kind === 'tool.completed') {
@@ -649,7 +687,7 @@ export const Route = createFileRoute('/api/send-stream')({
                          const state = toolStateByCallId.get(ev.callId)
                          const argsForCard =
                            state?.args && typeof state.args === 'object'
-                              ? (state.args as Record<string, unknown>)
+                              ? (state.args)
                              : undefined
                          const name = state?.name || 'tool'
                          persistActiveRun((runSessionKey, activeId) =>
@@ -670,6 +708,7 @@ export const Route = createFileRoute('/api/send-stream')({
                            sessionKey: portableSessionKey,
                            runId,
                          })
+                          lastActivity = `Completed: ${name.replace(/_/g, ' ')}`
                          continue
                        }
                        if (ev.kind === 'completed') {
@@ -1012,6 +1051,7 @@ export const Route = createFileRoute('/api/send-stream')({
                        sessionKey: sessionKeyFromEvent,
                        friendlyId: sessionKeyFromEvent,
                      })
+                      lastActivity = 'Processing your message...'
                    }

                    if (event === 'run.started') {
@@ -1137,6 +1177,7 @@ export const Route = createFileRoute('/api/send-stream')({
                      )
                      sendEvent('tool', translated)
                      skipPublish || publishChatEvent('tool', translated)
+                      lastActivity = `Running: ${toolName.replace(/_/g, ' ')}`
                      return
                    }

@@ -1155,6 +1196,7 @@ export const Route = createFileRoute('/api/send-stream')({
                        }
                        sendEvent('thinking', translated)
                        skipPublish || publishChatEvent('thinking', translated)
+                        lastActivity = delta.length > 60 ? delta.slice(0, 60) + '...' : delta
                        return
                      }
                      const translated = {
@@ -1203,6 +1245,7 @@ export const Route = createFileRoute('/api/send-stream')({
                      )
                      sendEvent('tool', translated)
                      skipPublish || publishChatEvent('tool', translated)
+                      lastActivity = `Completed: ${toolName.replace(/_/g, ' ')}`
                      return
                    }

@@ -1376,10 +1419,10 @@ export const Route = createFileRoute('/api/send-stream')({
                          )
                          const recent = persistedMessages.slice(
                            sliceFrom,
-                          ) as Array<Record<string, unknown>>
+                          )
                          let lastAssistantIndex = -1
                          for (let i = recent.length - 1; i >= 0; i--) {
-                            const m = recent[i] as Record<string, unknown>
+                            const m = recent[i]
                            if (m && m.role === 'assistant') {
                              lastAssistantIndex = i
                              break
@@ -1388,7 +1431,7 @@ export const Route = createFileRoute('/api/send-stream')({
                          if (lastAssistantIndex >= 0) {
                            const lastAssistant = recent[
                              lastAssistantIndex
-                            ] as Record<string, unknown>
+                            ]
                            const rawToolCalls = (lastAssistant.tool_calls ??
                              (lastAssistant as any).toolCalls) as
                              | Array<Record<string, unknown>>
@@ -1478,28 +1521,17 @@ export const Route = createFileRoute('/api/send-stream')({
            }
          },
          cancel() {
-            // Browser navigation/unmount cancels the response reader. That
-            // must not cancel the Hermes run itself: the chat/conductor should
-            // keep thinking server-side so the user can return and recover the
-            // answer from session history. Mark this client stream closed so we
-            // stop enqueueing SSE chunks, but deliberately leave the upstream
-            // abortController alone.
-            streamClosed = true
-            if (unregisterTimer) {
-              clearTimeout(unregisterTimer)
-              unregisterTimer = null
-            }
-            if (streamTimeoutTimer) {
-              clearTimeout(streamTimeoutTimer)
-              streamTimeoutTimer = null
-            }
-            if (activeRunId) {
+            // User clicked Stop, navigated away, or browser closed the tab.
+            // Mark the stream complete, persist the run as 'handoff' so
+            // session history reflects the interruption, then delegate to
+            // closeStream() for timer/controller cleanup.  Delegate instead
+            // of duplicating cleanup logic to keep the two paths in sync.
+            if (activeRunId && !streamClosed) {
              persistActiveRun((runSessionKey, activeId) =>
                markRunStatus(runSessionKey, activeId, 'handoff'),
              )
-              unregisterActiveSendRun(activeRunId)
-              activeRunId = null
            }
+            closeStream()
          },
        })

--- a/src/screens/chat/chat-screen.tsx
+++ b/src/screens/chat/chat-screen.tsx
@@ -1,8 +1,5 @@
 // Module-level local model override — set by composer when user picks a local model
 // Avoids prop threading. Reset when switching back to cloud models.
-export let _localModelOverride = ''
-export function setLocalModelOverride(model: string) { _localModelOverride = model }
-
 import {
  useCallback,
  useEffect,
@@ -21,12 +18,12 @@ import {
  textFromMessage,
 } from './utils'
 import {
+  
  advanceStickyStreamingText,
-  createResponseWaitSnapshot,
  createOptimisticMessage,
+  createResponseWaitSnapshot,
  isTerminalActiveRunStatus,
-  shouldClearWaitingForAssistantMessage,
-  type ResponseWaitSnapshot,
+  shouldClearWaitingForAssistantMessage
 } from './chat-screen-utils'
 import {
  appendHistoryMessage,
@@ -43,21 +40,20 @@ import { ChatEmptyState } from './components/chat-empty-state'
 import { ChatComposer } from './components/chat-composer'
 import { ConnectionStatusMessage } from './components/connection-status-message'
 import {
+  clearPendingSendForSession,
  consumePendingSend,
  hasPendingGeneration,
  hasPendingSend,
  isRecentSession,
  resetPendingSend,
  setPendingGeneration,
-  clearPendingSendForSession,
 } from './pending-send'
 import { useChatMeasurements } from './hooks/use-chat-measurements'
 import { useChatHistory } from './hooks/use-chat-history'
 import { useRealtimeChatHistory } from './hooks/use-realtime-chat-history'
+import { snapshotOptimisticUserMessages } from './hooks/optimistic-message-reinject'
 import { useSmoothStreamingText } from './hooks/use-smooth-streaming-text'
 import { useStreamingMessage } from './hooks/use-streaming-message'
-import { playChatComplete } from '@/lib/sounds'
-import { useChatSettingsStore } from '@/hooks/use-chat-settings'
 import { useActiveRunCheck } from './hooks/use-active-run-check'
 import { useChatMobile } from './hooks/use-chat-mobile'
 import { useChatSessions } from './hooks/use-chat-sessions'
@@ -70,6 +66,7 @@ import {
  CHAT_PENDING_COMMAND_STORAGE_KEY,
  CHAT_RUN_COMMAND_EVENT,
 } from './chat-events'
+import type {ResponseWaitSnapshot} from './chat-screen-utils';
 import type {
  ChatComposerAttachment,
  ChatComposerHandle,
@@ -79,6 +76,9 @@ import type {
 import type { ApprovalRequest } from '@/screens/gateway/lib/approvals-store'
 import type { ChatAttachment, ChatMessage, SessionMeta } from './types'
 import type { ChatRunCommandDetail } from './chat-events'
+import type {AgentActivity} from '@/stores/chat-activity-store';
+import { useChatSettingsStore } from '@/hooks/use-chat-settings'
+import { playChatComplete } from '@/lib/sounds'
 import {
  addApproval,
  loadApprovals,
@@ -101,12 +101,16 @@ import { MobileSessionsPanel } from '@/components/mobile-sessions-panel'
 import { ContextAlertModal } from '@/components/usage-meter/context-alert-modal'
 import { ErrorToastContainer, showErrorToast } from '@/components/error-toast'
 // ContextMeter removed — ContextBar (PR #32) replaces it
-import { useChatStore, persistRecoveryMessage } from '@/stores/chat-store'
+import { persistRecoveryMessage, useChatStore } from '@/stores/chat-store'
+import { useSessionModelStore } from '@/stores/session-model-store'
 import { useResearchCard } from '@/hooks/use-research-card'
 // MOBILE_TAB_BAR_OFFSET removed — tab bar always hidden in chat
 import { useTapDebug } from '@/hooks/use-tap-debug'
 import { useChatMode } from '@/hooks/use-chat-mode'
-import { useChatActivityStore, type AgentActivity } from '@/stores/chat-activity-store'
+import {  useChatActivityStore } from '@/stores/chat-activity-store'
+
+export let _localModelOverride = ''
+export function setLocalModelOverride(model: string) { _localModelOverride = model }

 type ChatScreenProps = {
  activeFriendlyId: string
@@ -481,45 +485,6 @@ export function ChatScreen({
  const portableChatFriendlyId = isPortableMode ? 'main' : activeFriendlyId
  // --- Issue #43 fix: lift waitingForResponse into persistent Zustand store ---
  // The store survives component unmount, so navigating away mid-stream
-  // doesn't lose the "waiting" flag. sessionStorage backup handles reloads.
-  const storeWaiting = useChatStore((s) => s.waitingSessionKeys)
-  // resolvedSessionKey isn't available yet (defined below), so we track it via
-  // a ref that's updated once it resolves. The memo/callback read the ref.
-  const sessionKeyForWaiting = useRef<string | undefined>(undefined)
-  const [activeRunCheckDone, setActiveRunCheckDone] = useState(false)
-
-  // Track stale-restored sessions that need API verification before showing thinking.
-  // On page reload, sessionStorage may contain stale "waiting" flags from a
-  // previous session. We must not show the thinking indicator until the
-  // active-run API check confirms the run is genuinely active. (Issue #449)
-  const pendingVerifySessionKeyRef = useRef<string | undefined>(undefined)
-  const waitingForResponse = useMemo(() => {
-    const key = sessionKeyForWaiting.current
-    if (!key) return hasPendingSend() || hasPendingGeneration()
-
-    // If we restored waiting state from sessionStorage but haven't verified
-    // with the API yet, don't show thinking — it might be stale (Issue #449).
-    if (
-      storeWaiting.has(key) &&
-      pendingVerifySessionKeyRef.current === key &&
-      !activeRunCheckDone
-    ) {
-      return false
-    }
-
-    return storeWaiting.has(key)
-  }, [storeWaiting, activeRunCheckDone])
-
-  const setWaitingForResponse = useCallback((waiting: boolean) => {
-    const store = useChatStore.getState()
-    const key = sessionKeyForWaiting.current
-    if (!key) return
-    if (waiting) {
-      store.setSessionWaiting(key)
-    } else {
-      store.clearSessionWaiting(key)
-    }
-  }, [])
  const [liveToolActivity, setLiveToolActivity] = useState<
    Array<{ name: string; timestamp: number }>
  >([])
@@ -540,10 +505,18 @@ export function ChatScreen({
    if (typeof window === 'undefined') return 'low'
    const key = `claude-thinking-${activeFriendlyId || 'new'}`
    const stored = window.sessionStorage.getItem(key)
-    if (stored === 'off' || stored === 'low' || stored === 'adaptive')
+    if (stored === 'off' || stored === 'low' || stored === 'medium' || stored === 'high' || stored === 'adaptive')
      return stored
    return 'low'
  })
+  // Tracks whether the user has explicitly picked a thinking level for this session.
+  // A missing/absent sessionStorage key means we should fall back to the Hermes config default.
+  const thinkingInitializedByUserRef = useRef(false)
+  useEffect(() => {
+    if (typeof window === 'undefined') return
+    const key = `claude-thinking-${activeFriendlyId || 'new'}`
+    thinkingInitializedByUserRef.current = window.sessionStorage.getItem(key) !== null
+  }, [activeFriendlyId])
  const { alertOpen, alertThreshold, alertPercent, dismissAlert } =
    useContextAlert()

@@ -611,10 +584,61 @@ export function ChatScreen({
    portableMode: isPortableMode,
  })

+  // --- Waiting state management (Issue #43 + #449) ---
+  // resolvedSessionKey is now available (defined above from useChatHistory).
+  const storeWaiting = useChatStore((s) => s.waitingSessionKeys)
+  const sessionKeyForWaiting = useRef<string | undefined>(undefined)
+  const pendingVerifySessionKeyRef = useRef<string | undefined>(undefined)
+
  // Keep the waiting-state ref in sync with the resolved session key
  sessionKeyForWaiting.current = resolvedSessionKey

-  // Detect stale restored waiting state from sessionStorage — we need API
+  // Synchronously detect stale waiting state from sessionStorage.
+  // This runs during render (not in an effect) so the guard in
+  // waitingForResponse is active on the very first render, preventing
+  // a flash of the "Thinking" indicator when reopening an old session.
+  const needsStaleCheck =
+    resolvedSessionKey &&
+    !isNewChat &&
+    storeWaiting.has(resolvedSessionKey) &&
+    pendingVerifySessionKeyRef.current !== resolvedSessionKey
+
+  if (needsStaleCheck) {
+    pendingVerifySessionKeyRef.current = resolvedSessionKey
+  }
+
+  // Track whether the active-run API check has completed.
+  // Initialize to false when we detect stale state (needs verification),
+  // true otherwise. This prevents showing "Thinking" until the API confirms.
+  const [activeRunCheckDone, setActiveRunCheckDone] = useState(!needsStaleCheck)
+
+  const waitingForResponse = useMemo(() => {
+    const key = sessionKeyForWaiting.current
+    if (!key) return hasPendingSend() || hasPendingGeneration()
+
+    // If we restored waiting state from sessionStorage but haven't verified
+    // with the API yet, don't show thinking — it might be stale (Issue #449).
+    if (
+      storeWaiting.has(key) &&
+      pendingVerifySessionKeyRef.current === key &&
+      !activeRunCheckDone
+    ) {
+      return false
+    }
+
+    return storeWaiting.has(key)
+  }, [storeWaiting, activeRunCheckDone])
+
+  const setWaitingForResponse = useCallback((waiting: boolean) => {
+    const store = useChatStore.getState()
+    const key = sessionKeyForWaiting.current
+    if (!key) return
+    if (waiting) {
+      store.setSessionWaiting(key)
+    } else {
+      store.clearSessionWaiting(key)
+    }
+  }, [])
  // verification before showing thinking (Issue #449).
  useEffect(() => {
    const currentSessionKey = resolvedSessionKey
@@ -868,13 +892,12 @@ export function ChatScreen({

  const streamStart = useCallback(() => {
    if (!activeFriendlyId || isNewChat) return
-    // Bug #3 fix: no more 350ms polling loop — SSE handles realtime updates.
-    // Single delayed fetch as fallback to catch the initial response.
-    if (streamTimer.current) window.clearTimeout(streamTimer.current)
-    streamTimer.current = window.setTimeout(() => {
-      if (activeRealtimeStreamingRef.current) return
-      refreshHistoryRef.current()
-    }, 2000)
+    // No aggressive delayed refetch here — it wipes optimistic user messages
+    // from the cache before the server has echoed them, causing the user's
+    // message to disappear until the agent completes. The existing failsafes
+    // (5s + 10s timeouts at lines below, active-run polling) handle the case
+    // where SSE misses the done event.
+    void activeFriendlyId // keep dep for eslint
  }, [activeFriendlyId, isNewChat])

  refreshHistoryRef.current = function refreshHistory() {
@@ -883,37 +906,21 @@ export function ChatScreen({
    // Snapshot any unconfirmed optimistic user messages BEFORE refetch.
    // The refetch replaces the query cache with server data — if the server
    // hasn't processed the user's POST yet, the optimistic message vanishes.
-    const currentMessages = (historyQuery.data as any)?.messages as
-      | Array<ChatMessage>
-      | undefined
-    const pendingOptimistic = (currentMessages ?? []).filter((msg) => {
-      const raw = msg as Record<string, unknown>
-      return (
-        msg.role === 'user' &&
-        (normalizeMessageValue(raw.__optimisticId).startsWith('opt-') ||
-          normalizeMessageValue(raw.status) === 'sending')
-      )
-    })
+    const historySessionKey = isPortableMode
+      ? 'main'
+      : activeSessionKey ||
+        sessionKeyForHistory ||
+        resolvedSessionKey ||
+        'main'
+    const reInjectOptimistic = snapshotOptimisticUserMessages(
+      queryClient,
+      portableChatFriendlyId,
+      historySessionKey,
+    )

    void historyQuery.refetch().then(() => {
      // Re-inject optimistic messages that weren't in the server response
-      if (pendingOptimistic.length === 0) return
-      const historySessionKey = isPortableMode
-        ? 'main'
-        : activeSessionKey ||
-          sessionKeyForHistory ||
-          resolvedSessionKey ||
-          'main'
-      if (!portableChatFriendlyId || !historySessionKey) return
-
-      for (const optimistic of pendingOptimistic) {
-        appendHistoryMessage(
-          queryClient,
-          portableChatFriendlyId,
-          historySessionKey,
-          optimistic,
-        )
-      }
+      reInjectOptimistic()
    })
  }

@@ -1018,6 +1025,29 @@ export function ChatScreen({
    retry: false,
  })

+  // Fetch the configured reasoning effort so the Chat Controls default matches
+  // what Hermes actually uses instead of hardcoding 'low'.
+  const reasoningEffortQuery = useQuery({
+    queryKey: ['hermes-config', 'reasoning-effort'],
+    queryFn: async () => {
+      try {
+        const res = await fetch('/api/hermes-config')
+        if (!res.ok) return 'low'
+        const data = await res.json() as { config?: Record<string, unknown> }
+        const agentSection = data?.config?.agent
+        if (agentSection && typeof agentSection === 'object' && !Array.isArray(agentSection)) {
+          const effort = (agentSection as Record<string, unknown>).reasoning_effort
+          if (effort === 'off' || effort === 'low' || effort === 'medium' || effort === 'high') return effort
+        }
+        return 'low'
+      } catch {
+        return 'low'
+      }
+    },
+    staleTime: 10 * 60 * 1000,
+    retry: false,
+  })
+
  const availableModelIds = useMemo(() => {
    const models = modelsQuery.data?.models || []
    return models.map((m: any) => m.id).filter((id: string) => id)
@@ -1054,6 +1084,16 @@ export function ChatScreen({
    }
  }, [currentModel, activeFriendlyId])

+  // If no per-session thinking level override exists, inherit from Hermes config
+  useEffect(() => {
+    if (thinkingInitializedByUserRef.current) return
+    const configEffort = reasoningEffortQuery.data
+    if (!configEffort) return
+    if (configEffort === 'off' || configEffort === 'low' || configEffort === 'medium' || configEffort === 'high') {
+      setThinkingLevel(configEffort)
+    }
+  }, [reasoningEffortQuery.data])
+
  // Persist thinking level changes to sessionStorage
  const handleThinkingLevelChange = useCallback(
    (level: ThinkingLevel) => {
@@ -1378,7 +1418,7 @@ export function ChatScreen({
      return deduped
    }

-    const nextMessages = [...deduped]
+    let nextMessages = [...deduped]
    const streamToolCalls = activeToolCalls.map((toolCall) => ({
      ...toolCall,
      phase: toolCall.phase,
@@ -1394,6 +1434,42 @@ export function ChatScreen({
      __streamToolCalls: streamToolCalls,
    } as ChatMessage

+    // Check if the server has already returned a completed assistant message
+    // that overlaps with the streaming text. If so, drop the streaming
+    // placeholder to avoid showing the same response twice.
+    const streamingText = stableActiveStreamingText.trim()
+    const hasServerAssistantVersion = nextMessages.some((msg) => {
+      if (msg.role !== 'assistant') return false
+      if (msg.__streamingStatus === 'streaming') return false
+      // Any non-streaming assistant message that appears after the last user
+      // message is potentially the same response — match by text overlap
+      if (streamingText.length > 0) {
+        const msgText = textFromMessage(msg).trim()
+        if (msgText.length > 0 && (
+          msgText === streamingText ||
+          msgText.startsWith(streamingText) ||
+          streamingText.startsWith(msgText)
+        )) {
+          return true
+        }
+      }
+      // Also match by tool calls: if the server message has the same tool
+      // calls as the streaming placeholder, it's the same response
+      if (streamToolCalls.length > 0) {
+        const msgContent = Array.isArray(msg.content) ? msg.content : []
+        const msgToolCalls = msgContent.filter((p: any) => p.type === 'toolCall')
+        if (msgToolCalls.length > 0 && msgToolCalls.length === streamToolCalls.length) {
+          return streamToolCalls.every((stc: any) =>
+            msgToolCalls.some((mtc: any) => mtc.name === stc.name)
+          )
+        }
+      }
+      return false
+    })
+    if (hasServerAssistantVersion) {
+      return nextMessages
+    }
+
    const existingStreamIdx = nextMessages.findIndex(
      (message) => message.__streamingStatus === 'streaming',
    )
@@ -1403,6 +1479,13 @@ export function ChatScreen({
        ...nextMessages[existingStreamIdx],
        ...streamingMsg,
      }
+      // Remove any other streaming messages (e.g. from mergeHistoryMessages
+      // appending a realtime message after finalDisplayMessages already
+      // injected a placeholder). Keep only one streaming placeholder.
+      const keepIdx = existingStreamIdx
+      nextMessages = nextMessages.filter(
+        (m, i) => i === keepIdx || m.__streamingStatus !== 'streaming',
+      )
      return nextMessages
    }

--- a/src/screens/chat/components/chat-composer.tsx
+++ b/src/screens/chat/components/chat-composer.tsx
@@ -36,8 +36,8 @@ import type {
 } from '@/components/slash-command-menu'
 import {
  DEFAULT_SLASH_COMMANDS,
-  mergeSlashCommands,
  SlashCommandMenu,
+  mergeSlashCommands,
 } from '@/components/slash-command-menu'
 import {
  PromptInput,
@@ -61,6 +61,7 @@ import {
  emitSearchModalEvent,
 } from '@/hooks/use-search-modal'
 import { setLocalModelOverride } from '@/screens/chat/local-model-override'
+import { formatModelName } from '@/lib/format-model-name'

 type ChatComposerAttachment = {
  id: string
@@ -72,7 +73,7 @@ type ChatComposerAttachment = {
  kind?: 'image' | 'file' | 'audio'
 }

-type ThinkingLevel = 'off' | 'low' | 'medium' | 'high'
+type ThinkingLevel = 'off' | 'low' | 'medium' | 'high' | 'adaptive'

 type ChatComposerProps = {
  onSubmit: (
@@ -565,6 +566,43 @@ function getResolvedModelKey(model: string, provider?: string): string {
  return `${normalizedProvider}/${normalizedModel}`
 }

+/**
+ * Checks whether a model entry matches the current model string.
+ *
+ * The current model can arrive in several formats depending on the source:
+ *   - "provider/model-id"  (from session-status API, persisted session model)
+ *   - "model-id"           (bare ID from config or old data)
+ *
+ * The entry always has { id, provider } from the models catalog.
+ *
+ * We match if:
+ *   1. The current model equals the entry ID exactly (bare match), or
+ *   2. The current model ends with "/<entry.id>" (provider-prefixed match), or
+ *   3. The resolved key from entry (provider/id) equals the current model.
+ */
+function isCurrentModel(
+  currentModel: string,
+  entryId: string,
+  entryProvider: string,
+): boolean {
+  const cm = currentModel.trim()
+  const eid = entryId.trim()
+  const eprov = entryProvider.trim()
+  if (!cm || !eid) return false
+
+  // Exact match (bare ID)
+  if (cm === eid) return true
+
+  // Current model is "something/<entryId>"
+  if (cm.endsWith(`/${eid}`)) return true
+
+  // Resolved entry key matches current model exactly
+  const resolved = eprov ? `${eprov}/${eid}` : eid
+  if (resolved === cm) return true
+
+  return false
+}
+
 function isCanvasSupported(): boolean {
  if (typeof document === 'undefined') return false
  try {
@@ -1671,7 +1709,7 @@ function ChatComposerComponent({
  const promptPlaceholder = isMobileViewport
    ? 'Message...'
    : 'Ask anything... (↵ to send · ⇧↵ new line · ⌘⇧M switch model)'
-  const [serverCommands, setServerCommands] = useState<SlashCommandDefinition[]>([])
+  const [serverCommands, setServerCommands] = useState<Array<SlashCommandDefinition>>([])

  useEffect(() => {
    fetch('/api/commands')
@@ -2566,9 +2604,11 @@ function ChatComposerComponent({
                            unpinnedGroups.set(entry.provider, group)
                          }
                          const renderEntry = (entry: (typeof parsed)[0]) => {
-                            const isActive =
-                              entry.id === currentModel ||
-                              `${defaultProvider}/${entry.id}` === currentModel
+                            const isActive = isCurrentModel(
+                              persistedSessionModel || currentModel,
+                              entry.id,
+                              entry.provider,
+                            )
                            return (
                              <div
                                key={entry.id}
@@ -2757,9 +2797,9 @@ function ChatComposerComponent({
                        setIsThinkingMenuOpen(false)
                        setIsModelMenuOpen(false)
                      }}
-                      className="inline-flex h-8 items-center gap-1 rounded-full bg-primary-100/70 px-2 text-xs font-medium text-primary-600 transition-colors hover:bg-primary-200/80 dark:hover:bg-primary-800/60"
-                      title="Chat controls"
-                      aria-label="Chat controls"
+                      className="inline-flex h-8 items-center gap-1.5 rounded-full bg-primary-100/70 px-2 text-xs font-medium text-primary-600 transition-colors hover:bg-primary-200/80 dark:hover:bg-primary-800/60"
+                      title={`Chat controls · ${modelButtonLabel}`}
+                      aria-label={`Chat controls, current model: ${modelButtonLabel}`}
                    >
                      <svg
                        width="13"
@@ -2779,6 +2819,7 @@ function ChatComposerComponent({
                        <circle cx="15" cy="12" r="2" fill="currentColor" stroke="none" />
                        <circle cx="11" cy="18" r="2" fill="currentColor" stroke="none" />
                      </svg>
+                      <span className="max-w-[5rem] truncate sm:max-w-[8rem] md:max-w-[10rem]">{formatModelName(modelButtonLabel)}</span>
                      <HugeiconsIcon icon={ArrowDown01Icon} size={11} />
                    </button>
                    {isControlsMenuOpen ? (
@@ -2946,7 +2987,11 @@ function ChatComposerComponent({
                                        unpinnedGroups.set(entry.provider, group)
                                      }
                                      const renderEntry = (entry: (typeof parsed)[0]) => {
-                                        const isActive = entry.id === currentModel || `${defaultProvider}/${entry.id}` === currentModel
+                                        const isActive = isCurrentModel(
+                                          persistedSessionModel || currentModel,
+                                          entry.id,
+                                          entry.provider,
+                                        )
                                        return (
                                          <div key={entry.id} className="group relative flex items-center">
                                            <button
--- a/src/screens/chat/components/chat-header.tsx
+++ b/src/screens/chat/components/chat-header.tsx
@@ -64,7 +64,7 @@ function formatMobileSessionTitle(rawTitle: string): string {
  return title
 }

-type ThinkingLevel = 'off' | 'low' | 'adaptive'
+type ThinkingLevel = 'off' | 'low' | 'medium' | 'high' | 'adaptive'

 type ChatHeaderProps = {
  activeTitle: string
--- a/src/screens/chat/components/chat-message-list.tsx
+++ b/src/screens/chat/components/chat-message-list.tsx
@@ -25,6 +25,7 @@ import { AssistantAvatar } from '@/components/avatars'
 import { cn } from '@/lib/utils'
 import { hapticTap } from '@/lib/haptics'
 import { CHAT_OPEN_MESSAGE_SEARCH_EVENT } from '@/screens/chat/chat-events'
+import { useChatStore } from '@/stores/chat-store'

 /** Duration (ms) the thinking indicator stays visible after waitingForResponse
 *  clears, giving the first response message time to render before the
@@ -179,28 +180,49 @@ type ThinkingBubbleProps = {
  liveToolActivity?: Array<{ name: string; timestamp: number }>
  researchCard?: UseResearchCardResult
  isCompacting?: boolean
+  /** When true, always show "Thinking…" regardless of activity. Used for the
+   * first 10s before the delayed activity feed appears. */
+  forceSimple?: boolean
 }

 /**
- * Premium shimmer thinking bubble — matches the assistant message position
- * with three bouncing dots, a gradient shimmer sweep, and a dynamic status
+ * Shows a thinking indicator with animated dots and a meaningful status
 * label that reflects what's actually happening (tool calls, etc.).
+ * When forceSimple is true, suppresses all activity labels — just "Thinking…".
 */
 function ThinkingBubble({
-  activeToolCalls: _activeToolCalls = [],
-  liveToolActivity: _liveToolActivity = [],
+  activeToolCalls = [],
+  liveToolActivity = [],
  researchCard,
  isCompacting = false,
+  forceSimple = false,
 }: ThinkingBubbleProps) {
-  const statusLabel = isCompacting ? 'Compacting context...' : 'Thinking…'
+  // Fallback activity from heartbeat — shows last known agent activity
+  // when no tool calls are in flight (e.g. during pure reasoning)
+  const heartbeatActivity = useChatStore((s) => s.heartbeatActivity)

-  // Elapsed time counter — resets when the status label changes (new tool)
+  // Build a meaningful status label from live activity
+  const activeToolNames = activeToolCalls
+    .filter((tc) => tc.phase !== 'done' && tc.phase !== 'complete' && tc.phase !== 'completed')
+    .map((tc) => tc.name.replace(/_/g, ' '))
+  const liveToolNames = liveToolActivity.map((a) => a.name.replace(/_/g, ' '))
+  const uniqueNames = [...new Set([...activeToolNames, ...liveToolNames])]
+  const activityLabel =
+    uniqueNames.length > 0
+      ? `Using: ${uniqueNames.slice(0, 3).join(', ')}${uniqueNames.length > 3 ? ` +${uniqueNames.length - 3} more` : ''}`
+      : null
+  const statusLabel = isCompacting
+    ? 'Compacting context...'
+    : forceSimple
+      ? 'Thinking…'
+      : activityLabel || heartbeatActivity || 'Thinking…'
+
+  // Elapsed time counter — counts from bubble mount, not from last label change
  const [elapsed, setElapsed] = useState(0)
  useEffect(() => {
-    setElapsed(0)
    const interval = window.setInterval(() => setElapsed((s) => s + 1), 1000)
    return () => window.clearInterval(interval)
-  }, [statusLabel])
+  }, [])

  const elapsedLabel =
    elapsed >= 60
@@ -351,6 +373,33 @@ function ThinkingBubble({
  )
 }

+/** Minimal status line shown after 10s of thinking when no tool calls
+ *  are in flight yet. Shows heartbeat status + elapsed time. */
+function StatusLine() {
+  const heartbeatActivity = useChatStore((s) => s.heartbeatActivity)
+  const [elapsed, setElapsed] = useState(0)
+  useEffect(() => {
+    const interval = window.setInterval(() => setElapsed((s) => s + 1), 1000)
+    return () => window.clearInterval(interval)
+  }, [])
+
+  const elapsedLabel =
+    elapsed >= 60
+      ? `${Math.floor(elapsed / 60)}m ${elapsed % 60}s`
+      : `${elapsed}s`
+
+  return (
+    <div className="flex items-center gap-2 text-[11px] text-primary-400 dark:text-primary-500 py-0.5">
+      <span className="inline-block size-1.5 rounded-full bg-amber-400 animate-pulse" />
+      <span className="opacity-80">
+        {heartbeatActivity || 'Working…'}
+      </span>
+      <span aria-hidden="true" className="opacity-40">·</span>
+      <span className="tabular-nums opacity-50 font-mono">{elapsedLabel}</span>
+    </div>
+  )
+}
+
 const VIRTUAL_ROW_HEIGHT = 136
 const VIRTUAL_OVERSCAN = 8
 const NEAR_BOTTOM_THRESHOLD = 200
@@ -606,6 +655,13 @@ function ChatMessageListComponent({
  const [unreadCount, setUnreadCount] = useState(0)
  const [expandAllToolSections, setExpandAllToolSections] = useState(false)

+  // Activity feed delay: only show tool activity after 10s of thinking.
+  // For the first 10s, the ThinkingBubble stays simple ("Thinking…").
+  const THINKING_ACTIVITY_DELAY_S = 10
+  const [thinkingElapsed, setThinkingElapsed] = useState(0)
+  const thinkingStartRef = useRef<number>(0)
+  const thinkingTimerRef = useRef<ReturnType<typeof setInterval> | null>(null)
+
  // Bug 2 fix: grace period — keep thinking indicator alive briefly after
  // waitingForResponse clears so the response message has time to render.
  const [thinkingGrace, setThinkingGrace] = useState(false)
@@ -1107,6 +1163,52 @@ function ChatMessageListComponent({
    researchCard && researchCard.steps.length > 0,
  )

+  // Compute visibility of the entire bottom thinking area — the same gate
+  // used for rendering (lines below). Start / stop the elapsed timer here.
+  const thinkingAreaVisible =
+    showTypingIndicator ||
+    showResearchCard ||
+    isCompacting ||
+    liveToolActivity.length > 0 ||
+    (isStreaming && !streamingText) ||
+    (isStreaming && activeToolCalls.length > 0)
+
+  // Track how long the thinking area has been visible to gate the delayed
+  // activity feed (10s threshold).
+  useEffect(() => {
+    if (thinkingAreaVisible) {
+      if (thinkingStartRef.current === 0) {
+        thinkingStartRef.current = Date.now()
+        setThinkingElapsed(0)
+      }
+      if (!thinkingTimerRef.current) {
+        thinkingTimerRef.current = setInterval(() => {
+          setThinkingElapsed(
+            Math.floor((Date.now() - thinkingStartRef.current) / 1000),
+          )
+        }, 250)
+      }
+    } else {
+      if (thinkingTimerRef.current) {
+        clearInterval(thinkingTimerRef.current)
+        thinkingTimerRef.current = null
+      }
+      thinkingStartRef.current = 0
+      setThinkingElapsed(0)
+    }
+    return () => {
+      if (thinkingTimerRef.current) {
+        clearInterval(thinkingTimerRef.current)
+        thinkingTimerRef.current = null
+      }
+    }
+  }, [thinkingAreaVisible])
+
+  const showActivityFeed =
+    thinkingElapsed >= THINKING_ACTIVITY_DELAY_S ||
+    activeToolCalls.length > 0 ||
+    liveToolActivity.length > 0
+
  const shouldBottomPin =
    visibleEntries.length > 0 ||
    showToolOnlyNotice ||
@@ -1146,11 +1248,11 @@ function ChatMessageListComponent({
          args: tcAny.args,
          preview:
            typeof tcAny.preview === 'string'
-              ? (tcAny.preview as string)
+              ? (tcAny.preview)
              : undefined,
          result:
            typeof tcAny.result === 'string'
-              ? (tcAny.result as string)
+              ? (tcAny.result)
              : undefined,
        }
      })
@@ -1823,12 +1925,12 @@ function ChatMessageListComponent({
                  liveToolActivity={liveToolActivity}
                  researchCard={researchCard}
                  isCompacting={isCompacting}
+                  forceSimple={!showActivityFeed}
                />
-                {/* Branch from the thinking bubble into a single compact
-                    TUI-style tool activity card. Use normalized streaming calls
-                    so the card appears for both structured tool events and the
-                    lighter live activity feed. */}
-                {normalizedStreamingToolCalls.length > 0 ? (
+                {/* After 10s of thinking, show activity feed. With tool calls:
+                    compact CLI-style TuiActivityCard (last 3). Without tool calls:
+                    a minimal status line showing elapsed time and heartbeat. */}
+                {showActivityFeed ? (
                  <div className="flex max-w-[var(--chat-content-max-width)]">
                    <div
                      className="ml-[14px] mr-2 w-px shrink-0"
@@ -1839,51 +1941,55 @@ function ChatMessageListComponent({
                      aria-hidden
                    />
                    <div className="min-w-0 flex-1 pt-1">
-                      <TuiActivityCard
-                        toolSections={normalizedStreamingToolCalls.map((tc) => {
-                          const phase = tc.phase
-                          const state =
-                            phase === 'error'
-                              ? ('output-error' as const)
-                              : phase === 'done'
-                                ? ('output-available' as const)
-                                : phase === 'running'
-                                  ? ('input-streaming' as const)
-                                  : ('input-available' as const)
-                          return {
-                            key: tc.id,
-                            type: tc.name,
-                            input:
-                              tc.args &&
-                              typeof tc.args === 'object' &&
-                              !Array.isArray(tc.args)
-                                ? (tc.args as Record<string, unknown>)
-                                : undefined,
-                            preview: tc.preview,
-                            outputText:
-                              state === 'output-available'
-                                ? tc.result || ''
-                                : '',
-                            errorText:
-                              state === 'output-error'
-                                ? tc.result || 'Tool failed'
-                                : undefined,
-                            state,
-                          }
-                        })}
-                        thinking={null}
-                        isStreaming={true}
-                        formatLabel={(name) => name.replace(/_/g, ' ')}
-                        formatArg={(_name, args) => {
-                          if (!args) return null
-                          const first = Object.values(args).find(
-                            (v) => typeof v === 'string' && v.trim(),
-                          )
-                          return typeof first === 'string'
-                            ? first.trim()
-                            : null
-                        }}
-                      />
+                      {normalizedStreamingToolCalls.length > 0 ? (
+                        <TuiActivityCard
+                          toolSections={normalizedStreamingToolCalls.slice(-3).map((tc) => {
+                            const phase = tc.phase
+                            const state =
+                              phase === 'error'
+                                ? ('output-error' as const)
+                                : phase === 'done'
+                                  ? ('output-available' as const)
+                                  : phase === 'running'
+                                    ? ('input-streaming' as const)
+                                    : ('input-available' as const)
+                            return {
+                              key: tc.id,
+                              type: tc.name,
+                              input:
+                                tc.args &&
+                                typeof tc.args === 'object' &&
+                                !Array.isArray(tc.args)
+                                  ? (tc.args as Record<string, unknown>)
+                                  : undefined,
+                              preview: tc.preview,
+                              outputText:
+                                state === 'output-available'
+                                  ? tc.result || ''
+                                  : '',
+                              errorText:
+                                state === 'output-error'
+                                  ? tc.result || 'Tool failed'
+                                  : undefined,
+                              state,
+                            }
+                          })}
+                          thinking={null}
+                          isStreaming={true}
+                          formatLabel={(name) => name.replace(/_/g, ' ')}
+                          formatArg={(_name, args) => {
+                            if (!args) return null
+                            const first = Object.values(args).find(
+                              (v) => typeof v === 'string' && v.trim(),
+                            )
+                            return typeof first === 'string'
+                              ? first.trim()
+                              : null
+                          }}
+                        />
+                      ) : (
+                        <StatusLine />
+                      )}
                    </div>
                  </div>
                ) : null}
@@ -1955,11 +2061,24 @@ function getStableMessageId(message: ChatMessage, index: number): string {
  }

  const timestamp = getRawMessageTimestamp(message)
+  const text = textFromMessage(message)
+  // Content-based fingerprint: hash of text content + timestamp.
+  // This survives reordering because it doesn't depend on array position.
+  const fingerprint = djb2(text.slice(0, 120))
  if (timestamp) {
-    return `${message.role ?? 'assistant'}-${timestamp}-${index}`
+    return `${message.role ?? 'assistant'}-${timestamp}-${fingerprint}`
  }

-  return `${message.role ?? 'assistant'}-${index}`
+  return `${message.role ?? 'assistant'}-${fingerprint}-${index}`
+}
+
+/** djb2 string hash — fast, decent distribution, no deps */
+function djb2(str: string): string {
+  let hash = 5381
+  for (let i = 0; i < str.length; i++) {
+    hash = ((hash << 5) + hash + str.charCodeAt(i)) | 0
+  }
+  return (hash >>> 0).toString(36)
 }

 function getRawMessageTimestamp(message: ChatMessage): number | null {
--- a/src/screens/chat/components/message-item.tsx
+++ b/src/screens/chat/components/message-item.tsx
@@ -7,6 +7,11 @@ import {
  textFromMessage,
 } from '../utils'
 import { MessageActionsBar } from './message-actions-bar'
+import {
+  buildHermesActivitySummary,
+  shouldAutoExpandHermesActivityCard,
+} from './streaming-activity-ui'
+import { TuiActivityCard } from './tui-activity-card'
 import type { ChatAttachment, ChatMessage, ToolCallContent } from '../types'
 import type { ToolPart } from '@/components/prompt-kit/tool'
 import { AssistantAvatar, UserAvatar } from '@/components/avatars'
@@ -31,11 +36,6 @@ import {
  useChatSettingsStore,
 } from '@/hooks/use-chat-settings'
 import { cn } from '@/lib/utils'
-import {
-  buildHermesActivitySummary,
-  shouldAutoExpandHermesActivityCard,
-} from './streaming-activity-ui'
-import { TuiActivityCard } from './tui-activity-card'

 const WORDS_PER_TICK = 4
 const TICK_INTERVAL_MS = 50
@@ -2503,21 +2503,29 @@ function MessageItemComponent({
      {/* Grouped tool card above the assistant bubble. Only show once there
          is real assistant text in the bubble. While streaming with no text,
          the legacy ThinkingBubble in chat-message-list owns the visual and
-          renders its own branched TuiActivityCard so we don't double up. */}
+          renders its own branched TuiActivityCard so we don't double up.
+          When done streaming, show a compact tool-count chip instead of
+          the full expandable card. */}
      {!isUser &&
      finalToolSections.length > 0 &&
      (hasText || !effectiveIsStreaming) ? (
        <div className="w-full max-w-[var(--chat-content-max-width)] flex">
          <div className="w-6 shrink-0" aria-hidden />
          <div className="min-w-0 flex-1">
-            <TuiActivityCard
-              toolSections={finalToolSections}
-              thinking={null}
-              isStreaming={effectiveIsStreaming}
-              expandAll={expandAllToolSections}
-              formatLabel={formatToolDisplayLabel}
-              formatArg={keyArgLabel}
-            />
+            {effectiveIsStreaming ? (
+              <TuiActivityCard
+                toolSections={finalToolSections}
+                thinking={null}
+                isStreaming={effectiveIsStreaming}
+                expandAll={expandAllToolSections}
+                formatLabel={formatToolDisplayLabel}
+                formatArg={keyArgLabel}
+              />
+            ) : (
+              <span className="inline-block text-[11px] text-primary-400 dark:text-primary-500 py-0.5 opacity-60">
+                {finalToolSections.length} tool{finalToolSections.length !== 1 ? 's' : ''} used
+              </span>
+            )}
          </div>
        </div>
      ) : null}
--- a/src/screens/chat/hooks/optimistic-message-reinject.ts
+++ b/src/screens/chat/hooks/optimistic-message-reinject.ts
@@ -0,0 +1,88 @@
+import { appendHistoryMessage, chatQueryKeys } from '../chat-queries'
+import { textFromMessage } from '../utils'
+import type { QueryClient } from '@tanstack/react-query'
+import type { ChatMessage } from '../types'
+
+function normalize(value: unknown): string {
+  return typeof value === 'string' ? value.trim() : ''
+}
+
+/**
+ * Snapshot optimistic user messages from the history cache before a refetch,
+ * then re-inject them after the refetch completes.
+ *
+ * The refetch replaces the query cache with server data which won't include
+ * the optimistic message yet — without re-injection the user's message
+ * disappears until the server echoes it.
+ *
+ * Matches messages that are:
+ *   - Still optimistic (__optimisticId starts with "opt-")
+ *   - In sending/queued state
+ *   - Already confirmed by SSE (status "sent") but have no server id yet
+ *     (only clientId) — these can still be lost during refetch.
+ *
+ * After refetch, the returned closure checks if the server already echoed
+ * the user message (by clientId or text match) and skips re-injection to
+ * avoid duplicates.
+ *
+ * Usage:
+ *   const reInject = snapshotOptimisticUserMessages(queryClient, friendlyId, sessionKey)
+ *   await queryClient.invalidateQueries(...)
+ *   reInject()
+ */
+export function snapshotOptimisticUserMessages(
+  queryClient: QueryClient,
+  friendlyId: string,
+  sessionKey: string,
+): () => void {
+  const key = chatQueryKeys.history(friendlyId, sessionKey)
+  const prevData = queryClient.getQueryData<Record<string, unknown>>(key)
+  const pending = ((prevData?.messages as Array<unknown> | undefined) ?? []).filter(
+    (msg: unknown) => {
+      const raw = msg as Record<string, unknown>
+      if (raw.role !== 'user') return false
+      if (String(raw.__optimisticId ?? '').startsWith('opt-')) return true
+      if (String(raw.status) === 'sending' || String(raw.status) === 'queued') return true
+      if (String(raw.status) === 'sent') {
+        // Re-inject only if the message has a clientId (local) but no server id
+        const hasClientId = normalize(raw.clientId).length > 0 || normalize(raw.client_id).length > 0
+        const hasServerId = normalize(raw.id).length > 0 || normalize(raw.messageId).length > 0
+        return hasClientId && !hasServerId
+      }
+      return false
+    },
+  ) as unknown as Array<ChatMessage>
+
+  return () => {
+    const currentData = queryClient.getQueryData<Record<string, unknown>>(key)
+    const currentMessages = (currentData?.messages as Array<unknown> | undefined) ?? []
+
+    for (const msg of pending) {
+      const raw = msg as unknown as Record<string, unknown>
+      const msgClientId = normalize(raw.clientId) || normalize(raw.client_id)
+      const msgText = textFromMessage(msg)
+
+      const alreadyPresent = currentMessages.some((m: unknown) => {
+        const mRaw = m as Record<string, unknown>
+        if (mRaw.role !== 'user') return false
+        if (msgClientId) {
+          const mClientId = normalize(mRaw.clientId) || normalize(mRaw.client_id)
+          if (mClientId && mClientId === msgClientId) return true
+        }
+        if (msgText.length > 0) {
+          const mText = textFromMessage(m as ChatMessage)
+          if (mText === msgText) {
+            const msgTs = (raw.timestamp as number) || 0
+            const mTs = (mRaw.timestamp as number) || 0
+            if (msgTs && mTs && Math.abs(msgTs - mTs) < 10_000) return true
+          }
+        }
+        return false
+      })
+
+      if (!alreadyPresent) {
+        appendHistoryMessage(queryClient, friendlyId, sessionKey, msg)
+      }
+    }
+  }
+}
--- a/src/screens/chat/hooks/use-active-run-check.ts
+++ b/src/screens/chat/hooks/use-active-run-check.ts
@@ -22,9 +22,14 @@ type ActiveRunResponse = {
 const ACTIVE_STATUSES: ReadonlySet<string> = new Set([
  'accepted',
  'active',
-  'handoff',
+  // NOTE: 'handoff' is deliberately excluded. A handoff run means the
+  // SSE client disconnected — the browser has no active stream. Keeping
+  // the waiting state alive for handoff runs causes ghost "Thinking"
+  // indicators on session reopen for runs that completed hours ago.
 ])

+const ACTIVE_RUN_CHECK_TIMEOUT_MS = 2000
+
 /**
 * On mount, checks whether the server has an active run for this session.
 * If so, marks the session as waiting in the persistent Zustand store.
@@ -33,6 +38,10 @@ const ACTIVE_STATUSES: ReadonlySet<string> = new Set([
 * This closes the gap where a user navigates away during streaming,
 * the component unmounts (losing local state), and on remount the UI
 * doesn't know a run was in progress.
+ *
+ * A timeout (ACTIVE_RUN_CHECK_TIMEOUT_MS) ensures the check never blocks
+ * the UI indefinitely — if the API is slow or unreachable, we assume the
+ * run is dead and clear stale waiting state.
 */
 export function useActiveRunCheck({
  sessionKey,
@@ -55,6 +64,25 @@ export function useActiveRunCheck({
    hasCheckedRef.current = true

    const controller = new AbortController()
+    let settled = false
+
+    const settle = () => {
+      if (settled) return
+      settled = true
+      onCompleteRef.current?.()
+    }
+
+    // Timeout: if the API check doesn't complete in time, assume the run is dead
+    const timeoutId = window.setTimeout(() => {
+      if (settled) return
+      settle()
+      try { controller.abort() } catch { /* ignore */ }
+      // Clear stale waiting state — the run is almost certainly dead
+      const store = useChatStore.getState()
+      if (store.isSessionWaiting(sessionKeyRef.current)) {
+        store.clearSessionWaiting(sessionKeyRef.current)
+      }
+    }, ACTIVE_RUN_CHECK_TIMEOUT_MS)

    async function check() {
      try {
@@ -62,10 +90,10 @@ export function useActiveRunCheck({
          `/api/sessions/${encodeURIComponent(sessionKey)}/active-run`,
          { signal: controller.signal },
        )
-        if (!response.ok) return
+        if (!response.ok) return finishCheck()

        const data = (await response.json()) as ActiveRunResponse
-        if (!data.ok) return
+        if (!data.ok) return finishCheck()

        const store = useChatStore.getState()
        if (data.run && ACTIVE_STATUSES.has(data.run.status)) {
@@ -75,15 +103,21 @@ export function useActiveRunCheck({
          store.clearSessionWaiting(sessionKey)
        }
      } catch {
-        // Network error or abort — ignore
+        // Network error or abort — ignore, already handled by timeout
      } finally {
-        onCompleteRef.current?.()
+        finishCheck()
      }
    }

+    function finishCheck() {
+      window.clearTimeout(timeoutId)
+      settle()
+    }
+
    void check()

    return () => {
+      window.clearTimeout(timeoutId)
      controller.abort()
    }
  }, [sessionKey, enabled])
--- a/src/screens/chat/hooks/use-chat-history.ts
+++ b/src/screens/chat/hooks/use-chat-history.ts
@@ -702,11 +702,19 @@ function mergeOptimisticHistoryMessages(
    }

    // Preserve unconfirmed optimistic messages regardless of age.
+    // Also preserve confirmed-sent messages that have a clientId but no
+    // server id yet — they were acknowledged by SSE (onStarted) but
+    // haven't been echoed by the server. Periodic refetches will drop
+    // them otherwise (the "user message disappears" bug).
    const isSending =
      optimisticMessage.status === 'sending' ||
      Boolean(optimisticMessage.__optimisticId)
+    const isSentButUnechoed =
+      optimisticMessage.status === 'sent' &&
+      Boolean(getMessageClientId(optimisticMessage)) &&
+      !optimisticMessage.id

-    if (isSending) {
+    if (isSending || isSentButUnechoed) {
      merged.push(optimisticMessage)
    }
  }
--- a/src/screens/chat/hooks/use-realtime-chat-history.ts
+++ b/src/screens/chat/hooks/use-realtime-chat-history.ts
@@ -5,6 +5,7 @@ import { useChatStore } from '../../../stores/chat-store'
 import { appendHistoryMessage, chatQueryKeys } from '../chat-queries'
 import { toast } from '../../../components/ui/toast'
 import { textFromMessage } from '../utils'
+import { snapshotOptimisticUserMessages } from './optimistic-message-reinject'
 import type { ChatMessage } from '../types'
 import type { StreamingState } from '../../../stores/chat-store'

@@ -324,6 +325,14 @@ export function useRealtimeChatHistory({
            const prevCount =
              (prevData?.messages as Array<unknown> | undefined)?.length ?? 0

+            // Snapshot optimistic user messages before refetch so they
+            // survive the cache replacement. Re-injected after refetch.
+            const reInjectOptimistic = snapshotOptimisticUserMessages(
+              queryClient,
+              effectiveFriendlyId,
+              effectiveSessionKey,
+            )
+
            // Issue #441 fix: Directly merge realtime buffer into history cache
            // INSTEAD of invalidateQueries. The old approach caused a race:
            // invalidateQueries → refetch (async) → merge runs with stale data
@@ -418,6 +427,8 @@ export function useRealtimeChatHistory({
                  )
                }
              }
+              // Re-inject optimistic user messages that the server hasn't echoed yet
+              reInjectOptimistic()
            })

            // Check for compaction — significant message count drop
--- a/src/screens/chat/hooks/use-streaming-message.ts
+++ b/src/screens/chat/hooks/use-streaming-message.ts
@@ -241,6 +241,7 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
        error: message,
      }))
      onError?.(message)
+      useChatStore.getState().setHeartbeatActivity(null)
    },
    [
      clearHandoffTimer,
@@ -429,6 +430,7 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
      }

      onComplete?.(message)
+      useChatStore.getState().setHeartbeatActivity(null)
    },
    [clearHandoffTimer, onComplete, stopFrame, unregisterSendStreamRun],
  )
@@ -444,7 +446,7 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
        typeof window !== 'undefined' &&
        window.localStorage?.getItem('hermes:debug:sse') === '1'
      ) {
-        // eslint-disable-next-line no-console
+         
        console.log(
          '[hermes-sse]',
          event,
@@ -754,6 +756,8 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
        }
        case 'heartbeat': {
          markActivity()
+          const activity = (payload as { activity?: string | null }).activity ?? null
+          useChatStore.getState().setHeartbeatActivity(activity)
          break
        }
        case 'close': {
@@ -851,6 +855,7 @@ export function useStreamingMessage(options: UseStreamingMessageOptions = {}) {
        streamingText: '',
        error: null,
      })
+      useChatStore.getState().setHeartbeatActivity(null)

      try {
        const response = await fetch('/api/send-stream', {
--- a/src/stores/chat-store.ts
+++ b/src/stores/chat-store.ts
@@ -140,6 +140,11 @@ type ChatState = {
  clearSessionWaiting: (sessionKey: string) => void
  /** Check if a session is waiting for a response */
  isSessionWaiting: (sessionKey: string) => boolean
+
+  /** Last activity description forwarded via heartbeat — used by ThinkingBubble
+   *  to show meaningful progress during long reasoning stretches */
+  heartbeatActivity: string | null
+  setHeartbeatActivity: (activity: string | null) => void
 }

 const createEmptyStreamingState = (): StreamingState => ({
@@ -641,6 +646,7 @@ export const useChatStore = create<ChatState>((set, get) => ({
  sendStreamRunIds: new Set(),
  waitingSessionKeys: _restoredWaiting.keys,
  waitingSessionMeta: _restoredWaiting.meta,
+  heartbeatActivity: null,

  setConnectionState: (connectionState, error) => {
    set({ connectionState, lastError: error ?? null })
@@ -687,6 +693,10 @@ export const useChatStore = create<ChatState>((set, get) => ({
    return get().waitingSessionKeys.has(sessionKey)
  },

+  setHeartbeatActivity: (activity) => {
+    set({ heartbeatActivity: activity })
+  },
+
  processEvent: (event) => {
    const state = get()
    const sessionKey = event.sessionKey
@@ -893,6 +903,31 @@ export const useChatStore = create<ChatState>((set, get) => ({
        }

        if (duplicateIndex === -1) {
+          // Multiple message.started events from the agent create distinct
+          // realtime entries with empty content. Replace the previous empty
+          // assistant message instead of appending — prevents "3 individual
+          // messages then one final" bug where each tool phase looks like a
+          // separate assistant bubble.
+          if (
+            incomingMessage.role === 'assistant' &&
+            newPlainText.length === 0 &&
+            sessionMessages.length > 0
+          ) {
+            const prevEmptyIdx = sessionMessages.findLastIndex(
+              (m) =>
+                m.role === 'assistant' &&
+                extractMessageText(m).length === 0,
+            )
+            if (prevEmptyIdx >= 0) {
+              sessionMessages[prevEmptyIdx] = incomingMessage
+              messages.set(
+                sessionKey,
+                sortMessagesChronologically(sessionMessages),
+              )
+              set({ realtimeMessages: messages, lastEventAt: now })
+              break
+            }
+          }
          sessionMessages.push(incomingMessage)
          messages.set(sessionKey, sortMessagesChronologically(sessionMessages))
          set({ realtimeMessages: messages, lastEventAt: now })
@@ -1209,6 +1244,13 @@ export const useChatStore = create<ChatState>((set, get) => ({
      if (histMsg.role === rtMsg.role && rtText) {
        const histText = extractMessageText(histMsg)
        if (histText === rtText) return true
+        // Streaming realtime text is a prefix of the final server text.
+        // Match either direction to prevent duplicates when the server
+        // returns the complete message after the realtime buffer had a
+        // partial version.
+        if (rtText.length > 0 && histText.length > 0) {
+          if (histText.startsWith(rtText) || rtText.startsWith(histText)) return true
+        }
      }

      const histRaw = histMsg as Record<string, unknown>
--- a/vite.config.ts
+++ b/vite.config.ts
@@ -88,6 +88,15 @@ async function isClaudeAgentHealthy(port = 8642): Promise<boolean> {

 const config = defineConfig(({ mode, command }) => {
  const env = loadEnv(mode, process.cwd(), '')
+  // Bridge loadEnv into process.env for server-side SSR runtime code that
+  // reads env vars directly from process.env (e.g. getBearerToken() in
+  // openai-compat-api.ts reads process.env.HERMES_API_TOKEN). Without this,
+  // Vite's loadEnv only populates the local `env` object — not process.env.
+  for (const key of Object.keys(env)) {
+    if (!(key in process.env)) {
+      process.env[key] = env[key]
+    }
+  }
  const claudeApiUrl = env.CLAUDE_API_URL?.trim() || 'http://127.0.0.1:8642'
  // /api/connection-status is handled by the real route file at
  // src/routes/api/connection-status.ts; the dev server no longer