feat(matrix): land QA follow-ups and refresh docs

- harden Matrix onboarding/chat lifecycle after manual QA - refresh README and Matrix docs to match current behavior - add local ignores for runtime artifacts and include current planning/report docs Closes #7 Closes #9 Closes #14
2026-04-05 19:08:58 +03:00 · 2026-04-05 19:08:58 +03:00 · 6ced154124
commit 6ced154124
parent 7fce4c9b3e
35 changed files with 8380 additions and 67 deletions
--- a/bot-examples/llm_session.py
+++ b/bot-examples/llm_session.py
@ -0,0 +1,635 @@
+"""Claude CLI session manager.
+
+Manages Claude Code CLI sessions per topic. Each topic gets a persistent
+session ID so conversation context is maintained across messages.
+
+Uses --output-format stream-json with asyncio subprocess to stream responses.
+Falls back to claude-zai if primary claude fails.
+
+Timeout: idle-based (resets on any output from Claude) + hard ceiling.
+Status: streams tool_use/agent events via on_status callback.
+Cancel: external cancel_event to stop processing.
+"""
+
+import asyncio
+import json
+import logging
+import os
+import shutil
+import time
+import uuid
+from collections.abc import Callable
+from pathlib import Path
+
+from core.config import Config
+
+logger = logging.getLogger(__name__)
+
+
+def _session_path(data_dir: Path, topic_id: int | str, provider: str = "") -> Path:
+    """Path to session ID file for a topic."""
+    suffix = f"_{provider}" if provider else ""
+    return data_dir / "topics" / str(topic_id) / f"session{suffix}.txt"
+
+
+def load_session(data_dir: Path, topic_id: int | str, provider: str = "") -> str | None:
+    """Load existing session ID for a topic, or None."""
+    path = _session_path(data_dir, topic_id, provider)
+    if path.exists():
+        return path.read_text().strip()
+    return None
+
+
+def save_session(data_dir: Path, topic_id: int | str, session_id: str, provider: str = "") -> None:
+    """Save session ID for a topic."""
+    path = _session_path(data_dir, topic_id, provider)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(session_id)
+
+
+async def send_message(
+    config: Config,
+    topic_id: int | str,
+    message: str,
+    on_chunk: Callable | None = None,
+    on_question: Callable | None = None,
+    on_status: Callable | None = None,
+    cancel_event: asyncio.Event | None = None,
+    idle_timeout_ref: list | None = None,
+    user_profile: str = "",
+    workspace_dir: Path | None = None,
+) -> str:
+    """Send a message to Claude CLI and return the response.
+
+    Args:
+        config: Application config.
+        topic_id: Topic ID (determines session and working directory).
+        message: User message text.
+        on_chunk: Optional async callback(text_so_far) for streaming updates.
+        on_question: Optional async callback(question) -> answer for ask-user tool.
+        on_status: Optional async callback(dict) for tool/agent status events.
+        cancel_event: Optional asyncio.Event — set to cancel processing.
+        idle_timeout_ref: Optional mutable [int] — current idle timeout in seconds.
+                          Can be modified externally (e.g. user "more time" command).
+        user_profile: Optional user profile text (from user.md) to inject into system prompt.
+        workspace_dir: Optional per-user workspace directory path.
+
+    Returns:
+        Full response text.
+
+    Raises:
+        RuntimeError: If both primary and fallback CLI fail.
+    """
+    # Try primary provider first
+    try:
+        return await _send_with_provider(config, topic_id, message, on_chunk, on_question,
+                                         on_status=on_status, cancel_event=cancel_event,
+                                         idle_timeout_ref=idle_timeout_ref,
+                                         provider="", user_profile=user_profile,
+                                         workspace_dir=workspace_dir)
+    except RuntimeError as e:
+        # Don't fallback if user cancelled
+        if cancel_event and cancel_event.is_set():
+            raise RuntimeError("Cancelled")
+        logger.warning("Primary claude failed (%s), trying fallback (claude-zai)", e)
+
+    # Fallback: claude-zai with separate session (using opus model)
+    try:
+        response = await _send_with_provider(
+            config, topic_id, message, on_chunk, on_question,
+            on_status=on_status, cancel_event=cancel_event,
+            idle_timeout_ref=idle_timeout_ref,
+            provider="zai", cmd_override="claude-zai", model_override="opus",
+            user_profile=user_profile, workspace_dir=workspace_dir,
+        )
+        # Add note that fallback provider was used
+        return response + "\n\n_[(via z.ai fallback)]_"
+    except RuntimeError:
+        raise RuntimeError("Both claude and claude-zai failed")
+
+
+async def _watch_questions(topic_dir: Path, on_question: Callable) -> None:
+    """Watch for ask-user.json and forward questions to the bot."""
+    question_file = topic_dir / "ask-user.json"
+    fifo_file = topic_dir / "ask-user.fifo"
+    while True:
+        await asyncio.sleep(0.5)
+        if not question_file.exists():
+            continue
+        try:
+            data = json.loads(question_file.read_text())
+            question = data.get("question", "")
+            logger.info("Claude asks user: %s", question[:200])
+            answer = await on_question(question)
+            # Write answer to FIFO (unblocks ask-user script)
+            with open(fifo_file, "w") as f:
+                f.write(answer)
+            question_file.unlink(missing_ok=True)
+        except Exception as e:
+            logger.error("Error handling ask-user: %s", e)
+            question_file.unlink(missing_ok=True)
+
+
+def _tool_preview(tool_name: str, raw_input: str) -> str:
+    """Extract a human-readable preview from tool input JSON."""
+    try:
+        inp = json.loads(raw_input)
+    except (json.JSONDecodeError, TypeError):
+        return raw_input[:200]
+
+    if tool_name == "Bash":
+        return inp.get("command", "")[:500]
+    if tool_name in ("Read", "Write"):
+        return inp.get("file_path", "")[:300]
+    if tool_name == "Edit":
+        return inp.get("file_path", "")[:300]
+    if tool_name in ("Glob", "Grep"):
+        return inp.get("pattern", "")[:200]
+    if tool_name == "WebSearch":
+        return inp.get("query", "")[:200]
+    if tool_name == "WebFetch":
+        return inp.get("url", "")[:300]
+    if tool_name == "Agent":
+        desc = inp.get("description", "")
+        prompt = inp.get("prompt", "")
+        return desc[:200] if desc else prompt[:300]
+    if tool_name == "TodoWrite":
+        todos = inp.get("todos", [])
+        if todos:
+            items = [t.get("content", "")[:80] for t in todos[:3]]
+            return "; ".join(items)
+
+    # Generic: show first key=value
+    for k, v in inp.items():
+        return f"{k}={str(v)[:200]}"
+    return ""
+
+
+def _load_conversation_log(data_dir: Path, topic_id: str, limit: int = 5) -> str:
+    """Load recent conversation log for context.
+
+    Returns formatted summary of last N interactions from log.jsonl,
+    so Claude has context even after session resets or fallback switches.
+    """
+    log_file = data_dir / "rooms" / str(topic_id) / "log.jsonl"
+    if not log_file.exists():
+        return ""
+    try:
+        with open(log_file) as f:
+            entries = [json.loads(line.strip()) for line in f if line.strip()]
+    except Exception:
+        return ""
+    if not entries:
+        return ""
+
+    recent = entries[-limit:]
+    parts = []
+    for e in recent:
+        ts = e.get("ts", "")[:16].replace("T", " ")
+        user = e.get("user", "")[:300]
+        bot = e.get("bot", "")[:500]
+        parts.append(f"[{ts}] User: {user}")
+        parts.append(f"[{ts}] Bot: {bot}")
+    return "\n".join(parts)
+
+
+async def _send_with_provider(
+    config: Config,
+    topic_id: int | str,
+    message: str,
+    on_chunk: Callable | None,
+    on_question: Callable | None,
+    on_status: Callable | None = None,
+    cancel_event: asyncio.Event | None = None,
+    idle_timeout_ref: list | None = None,
+    provider: str = "",
+    cmd_override: str | None = None,
+    model_override: str | None = None,
+    user_profile: str = "",
+    workspace_dir: Path | None = None,
+    _retry_count: int = 0,
+) -> str:
+    """Send message using a specific provider."""
+    existing_session = load_session(config.data_dir, topic_id, provider)
+    topic_dir = config.data_dir / "topics" / str(topic_id)
+    topic_dir.mkdir(parents=True, exist_ok=True)
+
+    cmd = cmd_override or config.claude_cmd
+
+    # Build args: --resume for existing sessions, --session-id for new ones
+    if existing_session:
+        session_flag = ["--resume", existing_session]
+    else:
+        new_id = str(uuid.uuid4())
+        session_flag = ["--session-id", new_id]
+
+    # User profile: prefer explicit parameter, fallback to workspace user.md
+    user_context = ""
+    if user_profile:
+        user_context = f"\n\nUSER PROFILE:\n{user_profile}\n"
+    elif config.workspace_dir:
+        user_md = config.workspace_dir / "user.md"
+        if user_md.exists():
+            user_context = f"\n\nUSER PROFILE:\n{user_md.read_text().strip()}\n"
+
+    # Load recent conversation log — provides context after session resets,
+    # fallback switches, or timeouts. Always included so Claude knows what happened.
+    conv_log = _load_conversation_log(config.data_dir, str(topic_id))
+    conv_context = ""
+    if conv_log:
+        conv_context = (
+            "\n\nRECENT CONVERSATION LOG (from bot's perspective, "
+            "may overlap with your session memory — use to fill gaps "
+            "after timeouts or session switches):\n" + conv_log + "\n"
+        )
+
+    # Per-user workspace context
+    workspace_context = ""
+    if workspace_dir and workspace_dir.is_dir():
+        ws_md = workspace_dir / "WORKSPACE.md"
+        if ws_md.exists():
+            workspace_context = (
+                f"\n\nUSER WORKSPACE ({workspace_dir}):\n"
+                f"{ws_md.read_text().strip()}\n"
+                f"\nYour working directory is the topic dir ({topic_dir}). "
+                f"Use it for scratch work (scripts, downloads, temp files). "
+                f"Save important/refined results to the workspace at {workspace_dir}. "
+                f"The workspace is a git repo — your changes will be committed automatically.\n"
+            )
+
+    # Paths Claude should know about
+    room_dir = config.data_dir / "rooms" / str(topic_id)
+    log_file = room_dir / "log.jsonl"
+    history_file = room_dir / "history.jsonl"
+
+    # System prompt with topic context
+    system_extra = (
+        f"Topic/room ID: {topic_id}. Data dir: {topic_dir}. "
+        f"After responding, update {config.data_dir / 'topic-map.yml'} "
+        f"with this topic's ID, path, and a short label. "
+        f"The bot renames the topic from the label. "
+        f"CONVERSATION HISTORY: Full conversation log is at {log_file} (JSONL, "
+        f"fields: ts, user, bot — every interaction with timestamps). "
+        f"Detailed message history with sender info: {history_file}. "
+        f"If you lose context (after timeout, session switch, or restart), "
+        f"READ these files to recover the full conversation. "
+        f"Entries ending with '[timed out]' or '[idle timeout]' mean your previous "
+        f"response was cut short — check what you were doing and continue. "
+        f"FORMATTING: User reads on mobile (Telegram/Matrix Element). "
+        f"NEVER use markdown tables — they render as broken text on mobile. "
+        f"Prefer bullet lists, bold headers, numbered lists to structure data. "
+        f"Small tables (2-4 cols, few rows): use monospace code block with aligned columns. "
+        f"Large/complex tables: generate HTML, convert to PDF via "
+        f"`html-to-pdf input.html output.pdf`, send via send-to-user. "
+        f"Do NOT use wkhtmltopdf — its PDFs are broken on iOS. "
+        f"SCREENSHOTS: `screenshot-page <url-or-file> output.png [--width 1280] [--height 900] "
+        f"[--wait 3] [--full-page] [--stealth]`. Works with URLs and local HTML files (folium maps etc). "
+        f"IMAGE SEARCH: `search-images \"query\" -o dir/ -n 4 -p prefix [--size large] "
+        f"[--orient horizontal]`. Uses Yandex Image Search API. Downloads images automatically. "
+        f"Add --no-download to just list URLs. "
+        f"WEB SEARCH: `search-web \"query\" [-n 10] [--lang ru]`. Yandex web search — "
+        f"best for Russian-language queries. Returns titles, URLs, snippets. "
+        f"Use for research, reviews, travel tips, local info. Lang: ru (default), en, tr. "
+        f"SENDING FILES: To send files to the user, use: `send-to-user <path> [caption]`. "
+        f"It is in PATH. The file will be delivered after your response. "
+        f"ASKING USER: To ask the user a question and wait for their reply, use: "
+        f"`ask-user \"your question\"`. It blocks until the user responds via the chat. "
+        f"IMAGE GENERATION: Use `generate-image` (NanoBanana/Gemini 3 Pro). "
+        f"It supports multi-turn chat for iterative refinement of images. "
+        f"First generation: `generate-image \"prompt\" output.png --chat history.json [-a 16:9]`. "
+        f"Refinement (edits the PREVIOUS image): `generate-image --chat history.json --refine \"change X to Y\" output2.png`. "
+        f"The --chat flag saves conversation context so the model remembers what it generated. "
+        f"ALWAYS use --chat with a history file in the current dir so you can refine later. "
+        f"The model can modify its own previous output when you use --refine — "
+        f"it does NOT generate from scratch, it edits the existing image. "
+        f"You can also pass reference images (up to 14): `generate-image \"prompt\" out.png --chat h.json --ref photo.jpg --ref photo2.jpg`. "
+        f"Aspect ratios: 9:16, 16:9, 1:1, 4:3, 3:4. Sizes: 1K, 2K, 4K (default). "
+        f"THREAD VISIBILITY: Your response is posted in a Matrix thread. "
+        f"The user sees ONLY the final message at a glance — intermediate tool output "
+        f"and thread messages are hidden unless expanded. "
+        f"All text the user needs to read MUST be in your response message, not only in files. "
+        f"Writing to files for persistence is fine, but the conversation text — "
+        f"analysis, notes, discussion points — must appear in the response itself. "
+        f"The user is chatting with you, not reading files. "
+        f"IMAGES IN CONTEXT: When conversation history contains entries like "
+        f"'[image: /path/to/file.png]', these are actual image files on disk. "
+        f"Use the Read tool to view them — they contain photos, screenshots, or book pages "
+        f"that the user shared. Always review referenced images before responding about them. "
+        f"TOOL DISCOVERY: Before installing packages or writing scripts, check what tools "
+        f"are already available. Common tools in PATH: transcribe-audio, send-to-user, "
+        f"ask-user, search-web, search-images, screenshot-page, generate-image, html-to-pdf, browser. "
+        f"BROWSER: If BROWSER_CDP_URL is set, you have access to a real Chrome browser via "
+        f"`browser <command>`. Commands: navigate <url>, screenshot [file], click <selector>, "
+        f"type <selector> <text>, read [selector], eval <js>, tabs, new [url], close. "
+        f"Use this for web interaction, authenticated sites, downloads, form filling. "
+        f"Run `ls /opt/agent-core/common-tools/` to see all. "
+        f"Prefer existing tools over writing new code."
+        f"{user_context}"
+        f"{workspace_context}"
+        f"{conv_context}"
+    )
+
+    claude_args = [
+        cmd,
+        *session_flag,
+        "-p",
+        "--verbose",
+        "--output-format", "stream-json",
+        "--append-system-prompt", system_extra,
+        "--allowedTools", ",".join(config.allowed_tools),
+        "--max-turns", "50",
+    ]
+    if model_override:
+        claude_args.extend(["--model", model_override])
+    claude_args.append(message)
+
+    # Wrap with bwrap if available
+    bwrap_path = Path(__file__).resolve().parent.parent / "bwrap-claude"
+    if bwrap_path.exists() and shutil.which("bwrap"):
+        args = [str(bwrap_path)] + claude_args
+    else:
+        args = claude_args
+
+    # Build clean environment for Claude subprocess
+    _strip_prefixes = ("CLAUDECODE", "CLAUDE_CODE")
+    _strip_keys = {
+        "BOT_TOKEN", "MATRIX_ACCESS_TOKEN", "MATRIX_HOMESERVER",
+        "MATRIX_USER_ID", "MATRIX_OWNER_MXID", "MATRIX_DEVICE_ID",
+    }
+    # Auth env vars that must pass through to Claude CLI
+    _passthrough_keys = {"CLAUDE_CODE_OAUTH_TOKEN"}
+    env = {
+        k: v for k, v in os.environ.items()
+        if k in _passthrough_keys
+        or (not any(k.startswith(p) for p in _strip_prefixes) and k not in _strip_keys)
+    }
+    # Add common-tools to PATH so Claude can use send-to-user, generate-image, etc.
+    common_tools = str(Path(__file__).resolve().parent.parent / "common-tools")
+    env["PATH"] = common_tools + ":" + env.get("PATH", "")
+
+    # Load per-user workspace .env (Readest keys, Linkwarden keys, etc.)
+    if workspace_dir:
+        ws_env = workspace_dir / ".env"
+        if ws_env.exists():
+            for line in ws_env.read_text().splitlines():
+                line = line.strip()
+                if line and not line.startswith("#") and "=" in line:
+                    key, _, val = line.partition("=")
+                    env[key.strip()] = val.strip().strip("'\"")  # handle KEY="value" and KEY='value'
+
+    session_label = existing_session[:8] if existing_session else f"new:{new_id[:8]}"
+    logger.info("Claude CLI: topic=%s session=%s cmd=%s", topic_id, session_label, cmd)
+
+    proc = await asyncio.create_subprocess_exec(
+        *args,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+        cwd=str(topic_dir),
+        env=env,
+        limit=10 * 1024 * 1024,  # 10MB — stream-json lines can be huge (base64 images)
+    )
+
+    response_parts: list[str] = []
+    full_text = ""
+    result_text = ""  # clean final response from result event
+    result_session_id = None
+    timeout_reason = None
+
+    # Tool tracking for status events
+    block_tools: dict[str, str] = {}    # tool_use_id -> tool name
+
+    # Idle timeout state — mutable so watchdog can read, user can extend
+    idle_timeout = idle_timeout_ref if idle_timeout_ref is not None else [config.claude_idle_timeout]
+    last_activity = [time.monotonic()]
+    start_time = time.monotonic()
+
+    # Start question watcher if callback provided
+    question_task = None
+    if on_question:
+        question_task = asyncio.create_task(_watch_questions(topic_dir, on_question))
+
+    # Watchdog: checks idle timeout, hard timeout, and cancel
+    async def _watchdog():
+        nonlocal timeout_reason
+        while True:
+            await asyncio.sleep(2)
+            now = time.monotonic()
+            if cancel_event and cancel_event.is_set():
+                timeout_reason = "cancelled"
+                proc.kill()
+                return
+            idle = now - last_activity[0]
+            if idle > idle_timeout[0]:
+                timeout_reason = "idle"
+                proc.kill()
+                return
+            elapsed = now - start_time
+            if elapsed > config.claude_max_timeout:
+                timeout_reason = "max"
+                proc.kill()
+                return
+
+    watchdog_task = asyncio.create_task(_watchdog())
+
+    # Stream log — save all events from Claude CLI for debugging/replay
+    stream_log_path = topic_dir / "stream.jsonl"
+    stream_log = open(stream_log_path, "a")
+
+    try:
+        async for line in proc.stdout:
+            last_activity[0] = time.monotonic()  # reset idle timer on ANY output
+
+            line = line.decode("utf-8", errors="replace").strip()
+            if not line:
+                continue
+
+            # Log raw event to stream.jsonl
+            stream_log.write(line + "\n")
+            stream_log.flush()
+
+            try:
+                event = json.loads(line)
+            except json.JSONDecodeError:
+                logger.debug("Non-JSON stdout: %s", line[:200])
+                continue
+
+            etype = event.get("type")
+
+            # Capture session_id from init or result events
+            if etype == "system" and event.get("session_id"):
+                result_session_id = event["session_id"]
+            elif etype == "result" and event.get("session_id"):
+                result_session_id = event["session_id"]
+
+            # Handle result events — this has the clean final response
+            if etype == "result":
+                if event.get("is_error"):
+                    errors = event.get("errors", [])
+                    logger.error("Claude CLI error: %s", "; ".join(errors))
+                if event.get("result"):
+                    result_text = event["result"]
+
+            # --- Status events from stream-json ---
+            # Claude CLI emits full "assistant" snapshots (with tool_use blocks)
+            # followed by "user" events (with tool_result).
+            if etype == "assistant":
+                content = event.get("message", {}).get("content", [])
+                has_tools = any(b.get("type") == "tool_use" for b in content)
+
+                for block in content:
+                    if block.get("type") == "tool_use" and on_status:
+                        tool_name = block.get("name", "")
+                        tool_id = block.get("id", "")
+                        inp = block.get("input", {})
+                        preview = _tool_preview(tool_name, json.dumps(inp, ensure_ascii=False))
+                        if tool_id:
+                            block_tools[tool_id] = tool_name
+                        if tool_name == "Agent":
+                            desc = inp.get("description", "")
+                            bg = inp.get("run_in_background", False)
+                            await on_status({
+                                "event": "agent_start",
+                                "description": desc,
+                                "background": bg,
+                            })
+                        else:
+                            await on_status({
+                                "event": "tool_start",
+                                "tool": tool_name,
+                                "input_preview": preview,
+                            })
+
+                    # All assistant text goes to thread as narration.
+                    # Only result.result is the final clean response.
+                    if block.get("type") == "text" and block.get("text"):
+                        text = block["text"]
+                        if on_status:
+                            await on_status({
+                                "event": "thinking",
+                                "text": text,
+                            })
+                        # Also accumulate for on_chunk (Telegram streaming)
+                        response_parts.append(text)
+                        full_text = "".join(response_parts)
+                        if on_chunk:
+                            await on_chunk(full_text)
+
+            # Tool results mark tool completion
+            if etype == "user" and on_status:
+                content = event.get("message", {}).get("content", [])
+                if isinstance(content, list):
+                    for block in content:
+                        if isinstance(block, dict) and block.get("type") == "tool_result":
+                            tool_id = block.get("tool_use_id", "")
+                            tool_name = block_tools.pop(tool_id, "tool")
+                            await on_status({"event": "tool_end", "tool": tool_name})
+
+            # Check if watchdog killed the process
+            if watchdog_task.done():
+                break
+
+        await proc.wait()
+
+    except Exception:
+        if not watchdog_task.done():
+            watchdog_task.cancel()
+        raise
+    finally:
+        stream_log.close()
+        if not watchdog_task.done():
+            watchdog_task.cancel()
+        try:
+            await watchdog_task
+        except asyncio.CancelledError:
+            pass
+        if question_task:
+            question_task.cancel()
+            try:
+                await question_task
+            except asyncio.CancelledError:
+                pass
+
+    elapsed = int(time.monotonic() - start_time)
+
+    # Handle timeout/cancel
+    if timeout_reason:
+        await proc.wait()
+        if timeout_reason == "cancelled":
+            logger.info("Claude CLI cancelled by user after %ds", elapsed)
+            suffix = "\n\n[cancelled by user]"
+        elif timeout_reason == "idle":
+            logger.warning("Claude CLI idle timeout after %ds (idle limit: %ds)", elapsed, idle_timeout[0])
+            suffix = f"\n\n[idle timeout — no output for {idle_timeout[0]}s]"
+        else:
+            logger.error("Claude CLI hard timeout after %ds (max: %ds)", elapsed, config.claude_max_timeout)
+            suffix = f"\n\n[timeout — {elapsed}s elapsed]"
+
+        # Save session even on timeout — don't lose conversation history
+        if result_session_id:
+            save_session(config.data_dir, topic_id, result_session_id, provider)
+
+        # On timeout: prefer result_text (clean), fall back to full_text (has thinking)
+        response = result_text or full_text
+        error_patterns = ["Failed to authenticate", "API Error:", "authentication_error", "401"]
+        if response and not any(p in response for p in error_patterns):
+            return response + suffix
+        raise RuntimeError(f"Claude CLI {timeout_reason} after {elapsed}s (error response: {full_text[:100]})")
+
+    # Save session ID for future resume
+    if result_session_id:
+        save_session(config.data_dir, topic_id, result_session_id, provider)
+
+    # Check for error responses (auth failures, API errors) - these should trigger fallback
+    error_patterns = ["Failed to authenticate", "API Error:", "authentication_error", "401"]
+    is_error_response = any(p in full_text for p in error_patterns)
+
+    if proc.returncode != 0 or is_error_response:
+        stderr = await proc.stderr.read()
+        stderr_text = stderr.decode("utf-8", errors="replace").strip()
+        logger.error("Claude CLI failed (rc=%d): %s", proc.returncode, stderr_text[:500])
+        if is_error_response:
+            raise RuntimeError(f"Claude CLI returned error: {full_text[:200]}")
+        response = result_text or full_text
+        if response:
+            return response
+        # Non-auth failure with no output — raise to trigger fallback
+        # but preserve session file (conversation history is valuable)
+        raise RuntimeError(f"Claude CLI exited with code {proc.returncode}")
+
+    response = result_text or full_text
+    if not response and _retry_count < 1:
+        logger.warning("Claude CLI returned empty response, retrying (attempt %d)", _retry_count + 1)
+        return await _send_with_provider(
+            config, topic_id, message, on_chunk, on_question,
+            on_status=on_status, cancel_event=cancel_event,
+            idle_timeout_ref=idle_timeout_ref,
+            provider=provider, cmd_override=cmd_override, model_override=model_override,
+            user_profile=user_profile, workspace_dir=workspace_dir,
+            _retry_count=_retry_count + 1,
+        )
+
+    return response or "(no response)"
+
+
+def _extract_text(event: dict) -> str | None:
+    """Extract text content from a stream-json event."""
+    etype = event.get("type")
+
+    if etype == "assistant":
+        content = event.get("message", {}).get("content", [])
+        texts = []
+        for block in content:
+            if block.get("type") == "text":
+                texts.append(block.get("text", ""))
+        return "".join(texts) if texts else None
+
+    if etype == "content_block_delta":
+        delta = event.get("delta", {})
+        if delta.get("type") == "text_delta":
+            return delta.get("text", "")
+
+    # Don't extract from "result" — it duplicates what was already
+    # streamed via "assistant" events. The caller uses it as fallback
+    # only if full_text is empty after processing all events.
+
+    return None