From 74c214e9571ac584cfaae1d2408c1b0f079de2ed Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Mon, 9 Mar 2026 15:58:22 -0400
Subject: [PATCH 01/23] feat(honcho): async memory integration with prefetch
 pipeline and recallMode

Adds full Honcho memory integration to Hermes:

- Session manager with async background writes, memory modes (honcho/hybrid/local),
  and dialectic prefetch for first-turn context warming
- Agent integration: prefetch pipeline, tool surface gated by recallMode,
  system prompt context injection, SIGTERM/SIGINT flush handlers
- CLI commands: setup, status, mode, tokens, peer, identity, migrate
- recallMode setting (auto | context | tools) for A/B testing retrieval strategies
- Session strategies: per-session, per-repo (git tree root), per-directory, global
- Polymorphic memoryMode config: string shorthand or per-peer object overrides
- 97 tests covering async writes, client config, session resolution, and memory modes
---
 AGENTS.md                                     |   1 -
 cli-config.yaml.example                       |   1 +
 cli.py                                        |  39 +-
 gateway/run.py                                |   6 +
 hermes_cli/config.py                          |  32 +-
 hermes_cli/doctor.py                          |  34 +
 hermes_cli/main.py                            | 104 +++
 honcho_integration/cli.py                     | 749 ++++++++++++++++++
 honcho_integration/client.py                  | 159 +++-
 honcho_integration/session.py                 | 425 +++++++++-
 run_agent.py                                  | 282 ++++++-
 tests/honcho_integration/test_async_memory.py | 489 ++++++++++++
 tests/honcho_integration/test_client.py       |  72 +-
 tools/browser_tool.py                         |  19 +
 tools/honcho_tools.py                         | 197 +++--
 website/docs/user-guide/configuration.md      |   1 +
 website/docs/user-guide/messaging/slack.md    |   3 +
 17 files changed, 2478 insertions(+), 135 deletions(-)
 create mode 100644 honcho_integration/cli.py
 create mode 100644 tests/honcho_integration/test_async_memory.py

diff --git a/AGENTS.md b/AGENTS.md
index 21ad08a9..e25f325e 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -286,7 +286,6 @@ Activate with `/skin cyberpunk` or `display.skin: cyberpunk` in config.yaml.
 ---
 
 ## Important Policies
-
 ### Prompt Caching Must Not Break
 
 Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:**
diff --git a/cli-config.yaml.example b/cli-config.yaml.example
index 33f3702c..bd7b6b35 100644
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -665,6 +665,7 @@ display:
   #   all:     Running output updates + final message (default)
   background_process_notifications: all
 
+
   # Play terminal bell when agent finishes a response.
   # Useful for long-running tasks — your terminal will ding when the agent is done.
   # Works over SSH. Most terminals can be configured to flash the taskbar or play a sound.
diff --git a/cli.py b/cli.py
index 338d2f72..e28b56b1 100755
--- a/cli.py
+++ b/cli.py
@@ -1440,7 +1440,7 @@ class HermesCLI:
                 platform="cli",
                 session_db=self._session_db,
                 clarify_callback=self._clarify_callback,
-                honcho_session_key=self.session_id,
+                honcho_session_key=None,  # resolved by run_agent via config sessions map / title
                 fallback_model=self._fallback_model,
                 thinking_callback=self._on_thinking,
                 checkpoints_enabled=self.checkpoints_enabled,
@@ -2573,6 +2573,26 @@ class HermesCLI:
                             try:
                                 if self._session_db.set_session_title(self.session_id, new_title):
                                     _cprint(f"  Session title set: {new_title}")
+                                    # Re-map Honcho session key to new title
+                                    if self.agent and getattr(self.agent, '_honcho', None):
+                                        try:
+                                            hcfg = self.agent._honcho_config
+                                            new_key = (
+                                                hcfg.resolve_session_name(
+                                                    session_title=new_title,
+                                                    session_id=self.agent.session_id,
+                                                )
+                                                if hcfg else new_title
+                                            )
+                                            if new_key and new_key != self.agent._honcho_session_key:
+                                                old_key = self.agent._honcho_session_key
+                                                self.agent._honcho.get_or_create(new_key)
+                                                self.agent._honcho_session_key = new_key
+                                                from tools.honcho_tools import set_session_context
+                                                set_session_context(self.agent._honcho, new_key)
+                                                _cprint(f"  Honcho session: {old_key} → {new_key}")
+                                        except Exception:
+                                            pass
                                 else:
                                     _cprint("  Session not found in database.")
                             except ValueError as e:
@@ -2886,6 +2906,12 @@ class HermesCLI:
                 f"  ✅ Compressed: {original_count} → {new_count} messages "
                 f"(~{approx_tokens:,} → ~{new_tokens:,} tokens)"
             )
+            # Flush Honcho async queue so queued messages land before context resets
+            if self.agent and getattr(self.agent, '_honcho', None):
+                try:
+                    self.agent._honcho.flush_all()
+                except Exception:
+                    pass
         except Exception as e:
             print(f"  ❌ Compression failed: {e}")
 
@@ -3322,7 +3348,8 @@ class HermesCLI:
                 if response and pending_message:
                     response = response + "\n\n---\n_[Interrupted - processing new message]_"
             
-            if response:
+            response_previewed = result.get("response_previewed", False) if result else False
+            if response and not response_previewed:
                 # Use a Rich Panel for the response box — adapts to terminal
                 # width at render time instead of hard-coding border length.
                 try:
@@ -3342,7 +3369,7 @@ class HermesCLI:
                     border_style=_resp_color,
                     padding=(1, 2),
                 ))
-            
+
             # Play terminal bell when agent finishes (if enabled).
             # Works over SSH — the bell propagates to the user's terminal.
             if self.bell_on_complete:
@@ -4254,6 +4281,12 @@ class HermesCLI:
             # Unregister terminal_tool callbacks to avoid dangling references
             set_sudo_password_callback(None)
             set_approval_callback(None)
+            # Flush + shut down Honcho async writer (drains queue before exit)
+            if self.agent and getattr(self.agent, '_honcho', None):
+                try:
+                    self.agent._honcho.shutdown()
+                except Exception:
+                    pass
             # Close session in SQLite
             if hasattr(self, '_session_db') and self._session_db and self.agent:
                 try:
diff --git a/gateway/run.py b/gateway/run.py
index cd5c9318..4e1c7390 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -293,6 +293,12 @@ class GatewayRunner:
                 conversation_history=msgs,
             )
             logger.info("Pre-reset memory flush completed for session %s", old_session_id)
+            # Flush any queued Honcho writes before the session is dropped
+            if getattr(tmp_agent, '_honcho', None):
+                try:
+                    tmp_agent._honcho.shutdown()
+                except Exception:
+                    pass
         except Exception as e:
             logger.debug("Pre-reset memory flush failed for session %s: %s", old_session_id, e)
 
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index ccf3debc..66ce09c9 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -90,7 +90,7 @@ DEFAULT_CONFIG = {
         "inactivity_timeout": 120,
         "record_sessions": False,  # Auto-record browser sessions as WebM videos
     },
-    
+
     # Filesystem checkpoints — automatic snapshots before destructive file ops.
     # When enabled, the agent takes a snapshot of the working directory once per
     # conversation turn (on first write_file/patch call).  Use /rollback to restore.
@@ -849,6 +849,36 @@ _COMMENTED_SECTIONS = """
 """
 
 
+_COMMENTED_SECTIONS = """
+# ── Security ──────────────────────────────────────────────────────────
+# API keys, tokens, and passwords are redacted from tool output by default.
+# Set to false to see full values (useful for debugging auth issues).
+#
+# security:
+#   redact_secrets: false
+
+# ── Fallback Model ────────────────────────────────────────────────────
+# Automatic provider failover when primary is unavailable.
+# Uncomment and configure to enable. Triggers on rate limits (429),
+# overload (529), service errors (503), or connection failures.
+#
+# Supported providers:
+#   openrouter   (OPENROUTER_API_KEY)  — routes to any model
+#   openai-codex (OAuth — hermes login) — OpenAI Codex
+#   nous         (OAuth — hermes login) — Nous Portal
+#   zai          (ZAI_API_KEY)         — Z.AI / GLM
+#   kimi-coding  (KIMI_API_KEY)        — Kimi / Moonshot
+#   minimax      (MINIMAX_API_KEY)     — MiniMax
+#   minimax-cn   (MINIMAX_CN_API_KEY)  — MiniMax (China)
+#
+# For custom OpenAI-compatible endpoints, add base_url and api_key_env.
+#
+# fallback_model:
+#   provider: openrouter
+#   model: anthropic/claude-sonnet-4
+"""
+
+
 def save_config(config: Dict[str, Any]):
     """Save configuration to ~/.hermes/config.yaml."""
     from utils import atomic_yaml_write
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index de55bdff..8fe1882d 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -627,6 +627,40 @@ def run_doctor(args):
     else:
         check_warn("No GITHUB_TOKEN", "(60 req/hr rate limit — set in ~/.hermes/.env for better rates)")
 
+    # =========================================================================
+    # Honcho memory
+    # =========================================================================
+    print()
+    print(color("◆ Honcho Memory", Colors.CYAN, Colors.BOLD))
+
+    try:
+        from honcho_integration.client import HonchoClientConfig, GLOBAL_CONFIG_PATH
+        hcfg = HonchoClientConfig.from_global_config()
+
+        if not GLOBAL_CONFIG_PATH.exists():
+            check_warn("Honcho config not found", f"run: hermes honcho setup")
+        elif not hcfg.enabled:
+            check_info("Honcho disabled (set enabled: true in ~/.honcho/config.json to activate)")
+        elif not hcfg.api_key:
+            check_fail("Honcho API key not set", "run: hermes honcho setup")
+            issues.append("No Honcho API key — run 'hermes honcho setup'")
+        else:
+            from honcho_integration.client import get_honcho_client, reset_honcho_client
+            reset_honcho_client()
+            try:
+                get_honcho_client(hcfg)
+                check_ok(
+                    "Honcho connected",
+                    f"workspace={hcfg.workspace_id} mode={hcfg.memory_mode} freq={hcfg.write_frequency}",
+                )
+            except Exception as _e:
+                check_fail("Honcho connection failed", str(_e))
+                issues.append(f"Honcho unreachable: {_e}")
+    except ImportError:
+        check_warn("honcho-ai not installed", "pip install honcho-ai")
+    except Exception as _e:
+        check_warn("Honcho check failed", str(_e))
+
     # =========================================================================
     # Summary
     # =========================================================================
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 20d70fcb..69f4061e 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -18,6 +18,22 @@ Usage:
     hermes cron list           # List cron jobs
     hermes cron status         # Check if cron scheduler is running
     hermes doctor              # Check configuration and dependencies
+    hermes honcho setup                    # Configure Honcho AI memory integration
+    hermes honcho status                   # Show Honcho config and connection status
+    hermes honcho sessions                 # List directory → session name mappings
+    hermes honcho map <name>               # Map current directory to a session name
+    hermes honcho peer                     # Show peer names and dialectic settings
+    hermes honcho peer --user NAME         # Set user peer name
+    hermes honcho peer --ai NAME           # Set AI peer name
+    hermes honcho peer --reasoning LEVEL   # Set dialectic reasoning level
+    hermes honcho mode                     # Show current memory mode
+    hermes honcho mode [hybrid|honcho|local]  # Set memory mode
+    hermes honcho tokens                   # Show token budget settings
+    hermes honcho tokens --context N       # Set session.context() token cap
+    hermes honcho tokens --dialectic N     # Set dialectic result char cap
+    hermes honcho identity                 # Show AI peer identity representation
+    hermes honcho identity <file>          # Seed AI peer identity from a file (SOUL.md etc.)
+    hermes honcho migrate                  # Step-by-step migration guide: OpenClaw native → Hermes + Honcho
     hermes version             # Show version
     hermes update              # Update to latest version
     hermes uninstall           # Uninstall Hermes Agent
@@ -2281,6 +2297,94 @@ For more help on a command:
 
     skills_parser.set_defaults(func=cmd_skills)
 
+    # =========================================================================
+    # honcho command
+    # =========================================================================
+    honcho_parser = subparsers.add_parser(
+        "honcho",
+        help="Manage Honcho AI memory integration",
+        description=(
+            "Honcho is a memory layer that persists across sessions.\n\n"
+            "Each conversation is stored as a peer interaction in a workspace. "
+            "Honcho builds a representation of the user over time — conclusions, "
+            "patterns, context — and surfaces the relevant slice at the start of "
+            "each turn so Hermes knows who you are without you having to repeat yourself.\n\n"
+            "Modes: hybrid (Honcho + local MEMORY.md), honcho (Honcho only), "
+            "local (MEMORY.md only). Write frequency is configurable so memory "
+            "writes never block the response."
+        ),
+        formatter_class=__import__("argparse").RawDescriptionHelpFormatter,
+    )
+    honcho_subparsers = honcho_parser.add_subparsers(dest="honcho_command")
+
+    honcho_subparsers.add_parser("setup", help="Interactive setup wizard for Honcho integration")
+    honcho_subparsers.add_parser("status", help="Show current Honcho config and connection status")
+    honcho_subparsers.add_parser("sessions", help="List known Honcho session mappings")
+
+    honcho_map = honcho_subparsers.add_parser(
+        "map", help="Map current directory to a Honcho session name (no arg = list mappings)"
+    )
+    honcho_map.add_argument(
+        "session_name", nargs="?", default=None,
+        help="Session name to associate with this directory. Omit to list current mappings.",
+    )
+
+    honcho_peer = honcho_subparsers.add_parser(
+        "peer", help="Show or update peer names and dialectic reasoning level"
+    )
+    honcho_peer.add_argument("--user", metavar="NAME", help="Set user peer name")
+    honcho_peer.add_argument("--ai", metavar="NAME", help="Set AI peer name")
+    honcho_peer.add_argument(
+        "--reasoning",
+        metavar="LEVEL",
+        choices=("minimal", "low", "medium", "high", "max"),
+        help="Set default dialectic reasoning level (minimal/low/medium/high/max)",
+    )
+
+    honcho_mode = honcho_subparsers.add_parser(
+        "mode", help="Show or set memory mode (hybrid/honcho/local)"
+    )
+    honcho_mode.add_argument(
+        "mode", nargs="?", metavar="MODE",
+        choices=("hybrid", "honcho", "local"),
+        help="Memory mode to set (hybrid/honcho/local). Omit to show current.",
+    )
+
+    honcho_tokens = honcho_subparsers.add_parser(
+        "tokens", help="Show or set token budget for context and dialectic"
+    )
+    honcho_tokens.add_argument(
+        "--context", type=int, metavar="N",
+        help="Max tokens Honcho returns from session.context() per turn",
+    )
+    honcho_tokens.add_argument(
+        "--dialectic", type=int, metavar="N",
+        help="Max chars of dialectic result to inject into system prompt",
+    )
+
+    honcho_identity = honcho_subparsers.add_parser(
+        "identity", help="Seed or show the AI peer's Honcho identity representation"
+    )
+    honcho_identity.add_argument(
+        "file", nargs="?", default=None,
+        help="Path to file to seed from (e.g. SOUL.md). Omit to show usage.",
+    )
+    honcho_identity.add_argument(
+        "--show", action="store_true",
+        help="Show current AI peer representation from Honcho",
+    )
+
+    honcho_subparsers.add_parser(
+        "migrate",
+        help="Step-by-step migration guide from openclaw-honcho to Hermes Honcho",
+    )
+
+    def cmd_honcho(args):
+        from honcho_integration.cli import honcho_command
+        honcho_command(args)
+
+    honcho_parser.set_defaults(func=cmd_honcho)
+
     # =========================================================================
     # tools command
     # =========================================================================
diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py
new file mode 100644
index 00000000..feb3ee0f
--- /dev/null
+++ b/honcho_integration/cli.py
@@ -0,0 +1,749 @@
+"""CLI commands for Honcho integration management.
+
+Handles: hermes honcho setup | status | sessions | map | peer
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+from pathlib import Path
+
+GLOBAL_CONFIG_PATH = Path.home() / ".honcho" / "config.json"
+HOST = "hermes"
+
+
+def _read_config() -> dict:
+    if GLOBAL_CONFIG_PATH.exists():
+        try:
+            return json.loads(GLOBAL_CONFIG_PATH.read_text(encoding="utf-8"))
+        except Exception:
+            pass
+    return {}
+
+
+def _write_config(cfg: dict) -> None:
+    GLOBAL_CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
+    GLOBAL_CONFIG_PATH.write_text(
+        json.dumps(cfg, indent=2, ensure_ascii=False) + "\n",
+        encoding="utf-8",
+    )
+
+
+def _prompt(label: str, default: str | None = None, secret: bool = False) -> str:
+    suffix = f" [{default}]" if default else ""
+    sys.stdout.write(f"  {label}{suffix}: ")
+    sys.stdout.flush()
+    if secret:
+        if sys.stdin.isatty():
+            import getpass
+            val = getpass.getpass(prompt="")
+        else:
+            # Non-TTY (piped input, test runners) — read plaintext
+            val = sys.stdin.readline().strip()
+    else:
+        val = sys.stdin.readline().strip()
+    return val or (default or "")
+
+
+def _ensure_sdk_installed() -> bool:
+    """Check honcho-ai is importable; offer to install if not. Returns True if ready."""
+    try:
+        import honcho  # noqa: F401
+        return True
+    except ImportError:
+        pass
+
+    print("  honcho-ai is not installed.")
+    answer = _prompt("Install it now? (honcho-ai>=2.0.1)", default="y")
+    if answer.lower() not in ("y", "yes"):
+        print("  Skipping install. Run: pip install 'honcho-ai>=2.0.1'\n")
+        return False
+
+    import subprocess
+    print("  Installing honcho-ai...", flush=True)
+    result = subprocess.run(
+        [sys.executable, "-m", "pip", "install", "honcho-ai>=2.0.1"],
+        capture_output=True,
+        text=True,
+    )
+    if result.returncode == 0:
+        print("  Installed.\n")
+        return True
+    else:
+        print(f"  Install failed:\n{result.stderr.strip()}")
+        print("  Run manually: pip install 'honcho-ai>=2.0.1'\n")
+        return False
+
+
+def cmd_setup(args) -> None:
+    """Interactive Honcho setup wizard."""
+    cfg = _read_config()
+
+    print("\nHoncho memory setup\n" + "─" * 40)
+    print("  Honcho gives Hermes persistent cross-session memory.")
+    print("  Config is shared with other hosts at ~/.honcho/config.json\n")
+
+    if not _ensure_sdk_installed():
+        return
+
+    # API key
+    current_key = cfg.get("apiKey", "")
+    masked = f"...{current_key[-8:]}" if len(current_key) > 8 else ("set" if current_key else "not set")
+    print(f"  Current API key: {masked}")
+    new_key = _prompt("Honcho API key (leave blank to keep current)", secret=True)
+    if new_key:
+        cfg["apiKey"] = new_key
+
+    if not cfg.get("apiKey"):
+        print("\n  No API key configured. Get one at https://app.honcho.dev")
+        print("  Run 'hermes honcho setup' again once you have a key.\n")
+        return
+
+    # Peer name
+    current_peer = cfg.get("peerName", "")
+    new_peer = _prompt("Your name (user peer)", default=current_peer or os.getenv("USER", "user"))
+    if new_peer:
+        cfg["peerName"] = new_peer
+
+    # Host block
+    hosts = cfg.setdefault("hosts", {})
+    hermes_host = hosts.setdefault(HOST, {})
+
+    current_workspace = hermes_host.get("workspace") or cfg.get("workspace", "hermes")
+    new_workspace = _prompt("Workspace ID", default=current_workspace)
+    if new_workspace:
+        hermes_host["workspace"] = new_workspace
+        # Also update flat workspace if it was the primary one
+        if cfg.get("workspace") == current_workspace:
+            cfg["workspace"] = new_workspace
+
+    hermes_host.setdefault("aiPeer", HOST)
+
+    # Memory mode
+    current_mode = cfg.get("memoryMode", "hybrid")
+    print(f"\n  Memory mode options:")
+    print("    hybrid  — write to both Honcho and local MEMORY.md (default)")
+    print("    honcho  — Honcho only, skip MEMORY.md writes")
+    print("    local   — MEMORY.md only, Honcho disabled")
+    new_mode = _prompt("Memory mode", default=current_mode)
+    if new_mode in ("hybrid", "honcho", "local"):
+        cfg["memoryMode"] = new_mode
+    else:
+        cfg["memoryMode"] = "hybrid"
+
+    # Write frequency
+    current_wf = str(cfg.get("writeFrequency", "async"))
+    print(f"\n  Write frequency options:")
+    print("    async   — background thread, no token cost (recommended)")
+    print("    turn    — sync write after every turn")
+    print("    session — batch write at session end only")
+    print("    N       — write every N turns (e.g. 5)")
+    new_wf = _prompt("Write frequency", default=current_wf)
+    try:
+        cfg["writeFrequency"] = int(new_wf)
+    except (ValueError, TypeError):
+        cfg["writeFrequency"] = new_wf if new_wf in ("async", "turn", "session") else "async"
+
+    # Recall mode
+    current_recall = cfg.get("recallMode", "auto")
+    print(f"\n  Recall mode options:")
+    print("    auto    — pre-warmed context + memory tools available (default)")
+    print("    context — pre-warmed context only, memory tools suppressed")
+    print("    tools   — no pre-loaded context, rely on tool calls only")
+    new_recall = _prompt("Recall mode", default=current_recall)
+    if new_recall in ("auto", "context", "tools"):
+        cfg["recallMode"] = new_recall
+
+    # Session strategy
+    current_strat = cfg.get("sessionStrategy", "per-session")
+    print(f"\n  Session strategy options:")
+    print("    per-session   — new Honcho session each run, named by Hermes session ID (default)")
+    print("    per-repo      — one session per git repository (uses repo root name)")
+    print("    per-directory — one session per working directory")
+    print("    global        — single session across all directories")
+    new_strat = _prompt("Session strategy", default=current_strat)
+    if new_strat in ("per-session", "per-repo", "per-directory", "global"):
+        cfg["sessionStrategy"] = new_strat
+
+    cfg.setdefault("enabled", True)
+    cfg.setdefault("saveMessages", True)
+
+    _write_config(cfg)
+    print(f"\n  Config written to {GLOBAL_CONFIG_PATH}")
+
+    # Test connection
+    print("  Testing connection... ", end="", flush=True)
+    try:
+        from honcho_integration.client import HonchoClientConfig, get_honcho_client, reset_honcho_client
+        reset_honcho_client()
+        hcfg = HonchoClientConfig.from_global_config()
+        get_honcho_client(hcfg)
+        print("OK")
+    except Exception as e:
+        print(f"FAILED\n  Error: {e}")
+        return
+
+    print(f"\n  Honcho is ready.")
+    print(f"  Session:   {hcfg.resolve_session_name()}")
+    print(f"  Workspace: {hcfg.workspace_id}")
+    print(f"  Peer:      {hcfg.peer_name}")
+    _mode_str = hcfg.memory_mode
+    if hcfg.peer_memory_modes:
+        overrides = ", ".join(f"{k}={v}" for k, v in hcfg.peer_memory_modes.items())
+        _mode_str = f"{hcfg.memory_mode}  (peers: {overrides})"
+    print(f"  Mode:      {_mode_str}")
+    print(f"  Frequency: {hcfg.write_frequency}")
+    print(f"\n  Tools available in chat:")
+    print(f"    query_user_context  — ask Honcho a question about you (LLM-synthesized)")
+    print(f"    honcho_search       — semantic search over your history (no LLM)")
+    print(f"    honcho_profile      — your peer card, key facts (no LLM)")
+    print(f"\n  Other commands:")
+    print(f"    hermes honcho status     — show full config")
+    print(f"    hermes honcho mode       — show or change memory mode")
+    print(f"    hermes honcho tokens     — show or set token budgets")
+    print(f"    hermes honcho identity   — seed or show AI peer identity")
+    print(f"    hermes honcho map <name> — map this directory to a session name\n")
+
+
+def cmd_status(args) -> None:
+    """Show current Honcho config and connection status."""
+    try:
+        import honcho  # noqa: F401
+    except ImportError:
+        print("  honcho-ai is not installed. Run: hermes honcho setup\n")
+        return
+
+    cfg = _read_config()
+
+    if not cfg:
+        print("  No Honcho config found at ~/.honcho/config.json")
+        print("  Run 'hermes honcho setup' to configure.\n")
+        return
+
+    try:
+        from honcho_integration.client import HonchoClientConfig, get_honcho_client
+        hcfg = HonchoClientConfig.from_global_config()
+    except Exception as e:
+        print(f"  Config error: {e}\n")
+        return
+
+    api_key = hcfg.api_key or ""
+    masked = f"...{api_key[-8:]}" if len(api_key) > 8 else ("set" if api_key else "not set")
+
+    print(f"\nHoncho status\n" + "─" * 40)
+    print(f"  Enabled:        {hcfg.enabled}")
+    print(f"  API key:        {masked}")
+    print(f"  Workspace:      {hcfg.workspace_id}")
+    print(f"  Host:           {hcfg.host}")
+    print(f"  Config path:    {GLOBAL_CONFIG_PATH}")
+    print(f"  AI peer:        {hcfg.ai_peer}")
+    print(f"  User peer:      {hcfg.peer_name or 'not set'}")
+    print(f"  Session key:    {hcfg.resolve_session_name()}")
+    print(f"  Recall mode:    {hcfg.recall_mode}")
+    print(f"  Memory mode:    {hcfg.memory_mode}")
+    if hcfg.peer_memory_modes:
+        print(f"  Per-peer modes:")
+        for peer, mode in hcfg.peer_memory_modes.items():
+            print(f"    {peer}: {mode}")
+    print(f"  Write freq:     {hcfg.write_frequency}")
+
+    if hcfg.enabled and hcfg.api_key:
+        print("\n  Connection... ", end="", flush=True)
+        try:
+            get_honcho_client(hcfg)
+            print("OK\n")
+        except Exception as e:
+            print(f"FAILED ({e})\n")
+    else:
+        reason = "disabled" if not hcfg.enabled else "no API key"
+        print(f"\n  Not connected ({reason})\n")
+
+
+def cmd_sessions(args) -> None:
+    """List known directory → session name mappings."""
+    cfg = _read_config()
+    sessions = cfg.get("sessions", {})
+
+    if not sessions:
+        print("  No session mappings configured.\n")
+        print("  Add one with: hermes honcho map <session-name>")
+        print("  Or edit ~/.honcho/config.json directly.\n")
+        return
+
+    cwd = os.getcwd()
+    print(f"\nHoncho session mappings ({len(sessions)})\n" + "─" * 40)
+    for path, name in sorted(sessions.items()):
+        marker = " ←" if path == cwd else ""
+        print(f"  {name:<30} {path}{marker}")
+    print()
+
+
+def cmd_map(args) -> None:
+    """Map current directory to a Honcho session name."""
+    if not args.session_name:
+        cmd_sessions(args)
+        return
+
+    cwd = os.getcwd()
+    session_name = args.session_name.strip()
+
+    if not session_name:
+        print("  Session name cannot be empty.\n")
+        return
+
+    import re
+    sanitized = re.sub(r'[^a-zA-Z0-9_-]', '-', session_name).strip('-')
+    if sanitized != session_name:
+        print(f"  Session name sanitized to: {sanitized}")
+        session_name = sanitized
+
+    cfg = _read_config()
+    cfg.setdefault("sessions", {})[cwd] = session_name
+    _write_config(cfg)
+    print(f"  Mapped {cwd}\n     → {session_name}\n")
+
+
+def cmd_peer(args) -> None:
+    """Show or update peer names and dialectic reasoning level."""
+    cfg = _read_config()
+    changed = False
+
+    user_name = getattr(args, "user", None)
+    ai_name = getattr(args, "ai", None)
+    reasoning = getattr(args, "reasoning", None)
+
+    REASONING_LEVELS = ("minimal", "low", "medium", "high", "max")
+
+    if user_name is None and ai_name is None and reasoning is None:
+        # Show current values
+        hosts = cfg.get("hosts", {})
+        hermes = hosts.get(HOST, {})
+        print(f"\nHoncho peer config\n" + "─" * 40)
+        print(f"  User peer:        {cfg.get('peerName') or '(not set)'}")
+        print(f"  AI peer:          {hermes.get('aiPeer') or cfg.get('aiPeer') or HOST}")
+        lvl = hermes.get("dialecticReasoningLevel") or cfg.get("dialecticReasoningLevel") or "low"
+        max_chars = hermes.get("dialecticMaxChars") or cfg.get("dialecticMaxChars") or 600
+        print(f"  Dialectic level:  {lvl}  (options: {', '.join(REASONING_LEVELS)})")
+        print(f"  Dialectic cap:    {max_chars} chars\n")
+        return
+
+    if user_name is not None:
+        cfg["peerName"] = user_name.strip()
+        changed = True
+        print(f"  User peer → {cfg['peerName']}")
+
+    if ai_name is not None:
+        cfg.setdefault("hosts", {}).setdefault(HOST, {})["aiPeer"] = ai_name.strip()
+        changed = True
+        print(f"  AI peer   → {ai_name.strip()}")
+
+    if reasoning is not None:
+        if reasoning not in REASONING_LEVELS:
+            print(f"  Invalid reasoning level '{reasoning}'. Options: {', '.join(REASONING_LEVELS)}")
+            return
+        cfg.setdefault("hosts", {}).setdefault(HOST, {})["dialecticReasoningLevel"] = reasoning
+        changed = True
+        print(f"  Dialectic reasoning level → {reasoning}")
+
+    if changed:
+        _write_config(cfg)
+        print(f"  Saved to {GLOBAL_CONFIG_PATH}\n")
+
+
+def cmd_mode(args) -> None:
+    """Show or set the memory mode."""
+    MODES = {
+        "hybrid": "write to both Honcho and local MEMORY.md (default)",
+        "honcho": "Honcho only — MEMORY.md writes disabled",
+        "local":  "MEMORY.md only — Honcho disabled",
+    }
+    cfg = _read_config()
+    mode_arg = getattr(args, "mode", None)
+
+    if mode_arg is None:
+        current = (
+            (cfg.get("hosts") or {}).get(HOST, {}).get("memoryMode")
+            or cfg.get("memoryMode")
+            or "hybrid"
+        )
+        print(f"\nHoncho memory mode\n" + "─" * 40)
+        for m, desc in MODES.items():
+            marker = " ←" if m == current else ""
+            print(f"  {m:<8}  {desc}{marker}")
+        print(f"\n  Set with: hermes honcho mode [hybrid|honcho|local]\n")
+        return
+
+    if mode_arg not in MODES:
+        print(f"  Invalid mode '{mode_arg}'. Options: {', '.join(MODES)}\n")
+        return
+
+    cfg.setdefault("hosts", {}).setdefault(HOST, {})["memoryMode"] = mode_arg
+    _write_config(cfg)
+    print(f"  Memory mode → {mode_arg}  ({MODES[mode_arg]})\n")
+
+
+def cmd_tokens(args) -> None:
+    """Show or set token budget settings."""
+    cfg = _read_config()
+    hosts = cfg.get("hosts", {})
+    hermes = hosts.get(HOST, {})
+
+    context = getattr(args, "context", None)
+    dialectic = getattr(args, "dialectic", None)
+
+    if context is None and dialectic is None:
+        ctx_tokens = hermes.get("contextTokens") or cfg.get("contextTokens") or "(Honcho default)"
+        d_chars = hermes.get("dialecticMaxChars") or cfg.get("dialecticMaxChars") or 600
+        d_level = hermes.get("dialecticReasoningLevel") or cfg.get("dialecticReasoningLevel") or "low"
+        print(f"\nHoncho token settings\n" + "─" * 40)
+        print(f"  context tokens:   {ctx_tokens}")
+        print(f"    Max tokens Honcho returns from session.context() per turn.")
+        print(f"    Injected into Hermes system prompt — counts against your LLM budget.")
+        print(f"  dialectic cap:    {d_chars} chars")
+        print(f"    Max chars of peer.chat() result injected per turn.")
+        print(f"  dialectic level:  {d_level}  (controls Honcho-side inference depth)")
+        print(f"\n  Set with: hermes honcho tokens [--context N] [--dialectic N]\n")
+        return
+
+    changed = False
+    if context is not None:
+        cfg.setdefault("hosts", {}).setdefault(HOST, {})["contextTokens"] = context
+        print(f"  context tokens → {context}")
+        changed = True
+    if dialectic is not None:
+        cfg.setdefault("hosts", {}).setdefault(HOST, {})["dialecticMaxChars"] = dialectic
+        print(f"  dialectic cap  → {dialectic} chars")
+        changed = True
+
+    if changed:
+        _write_config(cfg)
+        print(f"  Saved to {GLOBAL_CONFIG_PATH}\n")
+
+
+def cmd_identity(args) -> None:
+    """Seed AI peer identity or show both peer representations."""
+    cfg = _read_config()
+    if not cfg.get("apiKey"):
+        print("  No API key configured. Run 'hermes honcho setup' first.\n")
+        return
+
+    file_path = getattr(args, "file", None)
+    show = getattr(args, "show", False)
+
+    try:
+        from honcho_integration.client import HonchoClientConfig, get_honcho_client
+        from honcho_integration.session import HonchoSessionManager
+        hcfg = HonchoClientConfig.from_global_config()
+        client = get_honcho_client(hcfg)
+        mgr = HonchoSessionManager(honcho=client, config=hcfg)
+        session_key = hcfg.resolve_session_name()
+        mgr.get_or_create(session_key)
+    except Exception as e:
+        print(f"  Honcho connection failed: {e}\n")
+        return
+
+    if show:
+        # ── User peer ────────────────────────────────────────────────────────
+        user_card = mgr.get_peer_card(session_key)
+        print(f"\nUser peer ({hcfg.peer_name or 'not set'})\n" + "─" * 40)
+        if user_card:
+            for fact in user_card:
+                print(f"  {fact}")
+        else:
+            print("  No user peer card yet. Send a few messages to build one.")
+
+        # ── AI peer ──────────────────────────────────────────────────────────
+        ai_rep = mgr.get_ai_representation(session_key)
+        print(f"\nAI peer ({hcfg.ai_peer})\n" + "─" * 40)
+        if ai_rep.get("representation"):
+            print(ai_rep["representation"])
+        elif ai_rep.get("card"):
+            print(ai_rep["card"])
+        else:
+            print("  No representation built yet.")
+            print("  Run 'hermes honcho identity <file>' to seed one.")
+        print()
+        return
+
+    if not file_path:
+        print("\nHoncho identity management\n" + "─" * 40)
+        print(f"  User peer: {hcfg.peer_name or 'not set'}")
+        print(f"  AI peer:   {hcfg.ai_peer}")
+        print()
+        print("    hermes honcho identity --show        — show both peer representations")
+        print("    hermes honcho identity <file>        — seed AI peer from SOUL.md or any .md/.txt\n")
+        return
+
+    from pathlib import Path
+    p = Path(file_path).expanduser()
+    if not p.exists():
+        print(f"  File not found: {p}\n")
+        return
+
+    content = p.read_text(encoding="utf-8").strip()
+    if not content:
+        print(f"  File is empty: {p}\n")
+        return
+
+    source = p.name
+    ok = mgr.seed_ai_identity(session_key, content, source=source)
+    if ok:
+        print(f"  Seeded AI peer identity from {p.name} into session '{session_key}'")
+        print(f"  Honcho will incorporate this into {hcfg.ai_peer}'s representation over time.\n")
+    else:
+        print(f"  Failed to seed identity. Check logs for details.\n")
+
+
+def cmd_migrate(args) -> None:
+    """Step-by-step migration guide: OpenClaw native memory → Hermes + Honcho."""
+    from pathlib import Path
+
+    # ── Detect OpenClaw native memory files ──────────────────────────────────
+    cwd = Path(os.getcwd())
+    openclaw_home = Path.home() / ".openclaw"
+
+    # User peer: facts about the user
+    user_file_names = ["USER.md", "MEMORY.md"]
+    # AI peer: agent identity / configuration
+    agent_file_names = ["SOUL.md", "IDENTITY.md", "AGENTS.md", "TOOLS.md", "BOOTSTRAP.md"]
+
+    user_files: list[Path] = []
+    agent_files: list[Path] = []
+    for name in user_file_names:
+        for d in [cwd, openclaw_home]:
+            p = d / name
+            if p.exists() and p not in user_files:
+                user_files.append(p)
+    for name in agent_file_names:
+        for d in [cwd, openclaw_home]:
+            p = d / name
+            if p.exists() and p not in agent_files:
+                agent_files.append(p)
+
+    cfg = _read_config()
+    has_key = bool(cfg.get("apiKey", ""))
+
+    print("\nHoncho migration: OpenClaw native memory → Hermes\n" + "─" * 50)
+    print()
+    print("  OpenClaw's native memory stores context in local markdown files")
+    print("  (USER.md, MEMORY.md, SOUL.md, ...) and injects them via QMD search.")
+    print("  Honcho replaces that with a cloud-backed, LLM-observable memory layer:")
+    print("  context is retrieved semantically, injected automatically each turn,")
+    print("  and enriched by a dialectic reasoning layer that builds over time.")
+    print()
+
+    # ── Step 1: Honcho account ────────────────────────────────────────────────
+    print("Step 1  Create a Honcho account")
+    print()
+    if has_key:
+        masked = f"...{cfg['apiKey'][-8:]}" if len(cfg["apiKey"]) > 8 else "set"
+        print(f"  Honcho API key already configured: {masked}")
+        print("  Skip to Step 2.")
+    else:
+        print("  Honcho is a cloud memory service. You need a free account to use it.")
+        print()
+        print("  1. Go to https://app.honcho.dev and create an account.")
+        print("  2. Copy your API key from the dashboard.")
+        print("  3. Run:  hermes honcho setup")
+        print("     This will store the key and create a workspace for this project.")
+        print()
+        answer = _prompt("  Run 'hermes honcho setup' now?", default="y")
+        if answer.lower() in ("y", "yes"):
+            cmd_setup(args)
+            cfg = _read_config()
+            has_key = bool(cfg.get("apiKey", ""))
+        else:
+            print()
+            print("  Run 'hermes honcho setup' when ready, then re-run this walkthrough.")
+
+    # ── Step 2: Detected files ────────────────────────────────────────────────
+    print()
+    print("Step 2  Detected OpenClaw memory files")
+    print()
+    if user_files or agent_files:
+        if user_files:
+            print(f"  User memory ({len(user_files)} file(s)) — will go to Honcho user peer:")
+            for f in user_files:
+                print(f"    {f}")
+        if agent_files:
+            print(f"  Agent identity ({len(agent_files)} file(s)) — will go to Honcho AI peer:")
+            for f in agent_files:
+                print(f"    {f}")
+    else:
+        print("  No OpenClaw native memory files found in cwd or ~/.openclaw/.")
+        print("  If your files are elsewhere, copy them here before continuing,")
+        print("  or seed them manually:  hermes honcho identity <path/to/file>")
+
+    # ── Step 3: Migrate user memory ───────────────────────────────────────────
+    print()
+    print("Step 3  Migrate user memory files → Honcho user peer")
+    print()
+    print("  USER.md and MEMORY.md contain facts about you that the agent should")
+    print("  remember across sessions. Honcho will store these under your user peer")
+    print("  and inject relevant excerpts into the system prompt automatically.")
+    print()
+    if user_files:
+        print(f"  Found: {', '.join(f.name for f in user_files)}")
+        print()
+        print("  These are picked up automatically the first time you run 'hermes'")
+        print("  with Honcho configured and no prior session history.")
+        print("  (Hermes calls migrate_memory_files() on first session init.)")
+        print()
+        print("  If you want to migrate them now without starting a session:")
+        for f in user_files:
+            print(f"    hermes honcho migrate  — this step handles it interactively")
+        if has_key:
+            answer = _prompt("  Upload user memory files to Honcho now?", default="y")
+            if answer.lower() in ("y", "yes"):
+                try:
+                    from honcho_integration.client import (
+                        HonchoClientConfig,
+                        get_honcho_client,
+                        reset_honcho_client,
+                    )
+                    from honcho_integration.session import HonchoSessionManager
+
+                    reset_honcho_client()
+                    hcfg = HonchoClientConfig.from_global_config()
+                    client = get_honcho_client(hcfg)
+                    mgr = HonchoSessionManager(honcho=client, config=hcfg)
+                    session_key = hcfg.resolve_session_name()
+                    mgr.get_or_create(session_key)
+                    # Upload from each directory that had user files
+                    dirs_with_files = set(str(f.parent) for f in user_files)
+                    any_uploaded = False
+                    for d in dirs_with_files:
+                        if mgr.migrate_memory_files(session_key, d):
+                            any_uploaded = True
+                    if any_uploaded:
+                        print(f"  Uploaded user memory files from: {', '.join(dirs_with_files)}")
+                    else:
+                        print("  Nothing uploaded (files may already be migrated or empty).")
+                except Exception as e:
+                    print(f"  Failed: {e}")
+        else:
+            print("  Run 'hermes honcho setup' first, then re-run this step.")
+    else:
+        print("  No user memory files detected. Nothing to migrate here.")
+
+    # ── Step 4: Seed AI identity ──────────────────────────────────────────────
+    print()
+    print("Step 4  Seed AI identity files → Honcho AI peer")
+    print()
+    print("  SOUL.md, IDENTITY.md, AGENTS.md, TOOLS.md, BOOTSTRAP.md define the")
+    print("  agent's character, capabilities, and behavioral rules. In OpenClaw")
+    print("  these are injected via file search at prompt-build time.")
+    print()
+    print("  In Hermes, they are seeded once into Honcho's AI peer through the")
+    print("  observation pipeline. Honcho builds a representation from them and")
+    print("  from every subsequent assistant message (observe_me=True). Over time")
+    print("  the representation reflects actual behavior, not just declaration.")
+    print()
+    if agent_files:
+        print(f"  Found: {', '.join(f.name for f in agent_files)}")
+        print()
+        if has_key:
+            answer = _prompt("  Seed AI identity from all detected files now?", default="y")
+            if answer.lower() in ("y", "yes"):
+                try:
+                    from honcho_integration.client import (
+                        HonchoClientConfig,
+                        get_honcho_client,
+                        reset_honcho_client,
+                    )
+                    from honcho_integration.session import HonchoSessionManager
+
+                    reset_honcho_client()
+                    hcfg = HonchoClientConfig.from_global_config()
+                    client = get_honcho_client(hcfg)
+                    mgr = HonchoSessionManager(honcho=client, config=hcfg)
+                    session_key = hcfg.resolve_session_name()
+                    mgr.get_or_create(session_key)
+                    for f in agent_files:
+                        content = f.read_text(encoding="utf-8").strip()
+                        if content:
+                            ok = mgr.seed_ai_identity(session_key, content, source=f.name)
+                            status = "seeded" if ok else "failed"
+                            print(f"    {f.name}: {status}")
+                except Exception as e:
+                    print(f"  Failed: {e}")
+        else:
+            print("  Run 'hermes honcho setup' first, then seed manually:")
+            for f in agent_files:
+                print(f"    hermes honcho identity {f}")
+    else:
+        print("  No agent identity files detected.")
+        print("  To seed manually:  hermes honcho identity <path/to/SOUL.md>")
+
+    # ── Step 5: What changes ──────────────────────────────────────────────────
+    print()
+    print("Step 5  What changes vs. OpenClaw native memory")
+    print()
+    print("  Storage")
+    print("    OpenClaw: markdown files on disk, searched via QMD at prompt-build time.")
+    print("    Hermes:   cloud-backed Honcho peers. Files can stay on disk as source")
+    print("              of truth; Honcho holds the live representation.")
+    print()
+    print("  Context injection")
+    print("    OpenClaw: file excerpts injected synchronously before each LLM call.")
+    print("    Hermes:   Honcho context prefetched async at turn end, injected next turn.")
+    print("              First turn has no Honcho context; subsequent turns are loaded.")
+    print()
+    print("  Memory growth")
+    print("    OpenClaw: you edit files manually to update memory.")
+    print("    Hermes:   Honcho observes every message and updates representations")
+    print("              automatically. Files become the seed, not the live store.")
+    print()
+    print("  Tool surface (available to the agent during conversation)")
+    print("    query_user_context   — ask Honcho a question, get a synthesized answer (LLM)")
+    print("    honcho_search        — semantic search over stored context (no LLM)")
+    print("    honcho_profile       — fast peer card snapshot (no LLM)")
+    print()
+    print("  Session naming")
+    print("    OpenClaw: no persistent session concept — files are global.")
+    print("    Hermes:   per-session by default — each run gets a new Honcho session")
+    print("              Map a custom name:  hermes honcho map <session-name>")
+
+    # ── Step 6: Next steps ────────────────────────────────────────────────────
+    print()
+    print("Step 6  Next steps")
+    print()
+    if not has_key:
+        print("  1. hermes honcho setup              — configure API key (required)")
+        print("  2. hermes honcho migrate            — re-run this walkthrough")
+    else:
+        print("  1. hermes honcho status             — verify Honcho connection")
+        print("  2. hermes                           — start a session")
+        print("     (user memory files auto-uploaded on first turn if not done above)")
+        print("  3. hermes honcho identity --show    — verify AI peer representation")
+        print("  4. hermes honcho tokens             — tune context and dialectic budgets")
+        print("  5. hermes honcho mode               — view or change memory mode")
+    print()
+
+
+def honcho_command(args) -> None:
+    """Route honcho subcommands."""
+    sub = getattr(args, "honcho_command", None)
+    if sub == "setup" or sub is None:
+        cmd_setup(args)
+    elif sub == "status":
+        cmd_status(args)
+    elif sub == "sessions":
+        cmd_sessions(args)
+    elif sub == "map":
+        cmd_map(args)
+    elif sub == "peer":
+        cmd_peer(args)
+    elif sub == "mode":
+        cmd_mode(args)
+    elif sub == "tokens":
+        cmd_tokens(args)
+    elif sub == "identity":
+        cmd_identity(args)
+    elif sub == "migrate":
+        cmd_migrate(args)
+    else:
+        print(f"  Unknown honcho command: {sub}")
+        print("  Available: setup, status, sessions, map, peer, mode, tokens, identity, migrate\n")
diff --git a/honcho_integration/client.py b/honcho_integration/client.py
index 054569df..f1d95b2e 100644
--- a/honcho_integration/client.py
+++ b/honcho_integration/client.py
@@ -27,6 +27,30 @@ GLOBAL_CONFIG_PATH = Path.home() / ".honcho" / "config.json"
 HOST = "hermes"
 
 
+def _resolve_memory_mode(
+    global_val: str | dict,
+    host_val: str | dict | None,
+) -> dict:
+    """Parse memoryMode (string or object) into memory_mode + peer_memory_modes.
+
+    Resolution order: host-level wins over global.
+    String form:  applies as the default for all peers.
+    Object form:  { "default": "hybrid", "hermes": "honcho", ... }
+                  "default" key sets the fallback; other keys are per-peer overrides.
+    """
+    # Pick the winning value (host beats global)
+    val = host_val if host_val is not None else global_val
+
+    if isinstance(val, dict):
+        default = val.get("default", "hybrid")
+        overrides = {k: v for k, v in val.items() if k != "default"}
+    else:
+        default = str(val) if val else "hybrid"
+        overrides = {}
+
+    return {"memory_mode": default, "peer_memory_modes": overrides}
+
+
 @dataclass
 class HonchoClientConfig:
     """Configuration for Honcho client, resolved for a specific host."""
@@ -42,10 +66,36 @@ class HonchoClientConfig:
     # Toggles
     enabled: bool = False
     save_messages: bool = True
+    # memoryMode: default for all peers. "hybrid" / "honcho" / "local"
+    memory_mode: str = "hybrid"
+    # Per-peer overrides — any named Honcho peer. Override memory_mode when set.
+    # Config object form: "memoryMode": { "default": "hybrid", "hermes": "honcho" }
+    peer_memory_modes: dict[str, str] = field(default_factory=dict)
+
+    def peer_memory_mode(self, peer_name: str) -> str:
+        """Return the effective memory mode for a named peer.
+
+        Resolution: per-peer override → global memory_mode default.
+        """
+        return self.peer_memory_modes.get(peer_name, self.memory_mode)
+    # Write frequency: "async" (background thread), "turn" (sync per turn),
+    # "session" (flush on session end), or int (every N turns)
+    write_frequency: str | int = "async"
     # Prefetch budget
     context_tokens: int | None = None
+    # Dialectic (peer.chat) settings
+    # reasoning_level: "minimal" | "low" | "medium" | "high" | "max"
+    # Used as the default; prefetch_dialectic may bump it dynamically.
+    dialectic_reasoning_level: str = "low"
+    # Max chars of dialectic result to inject into Hermes system prompt
+    dialectic_max_chars: int = 600
+    # Recall mode: how memory retrieval works when Honcho is active.
+    # "auto"    — pre-warmed context + memory tools available (model decides)
+    # "context" — pre-warmed context only, honcho memory tools removed
+    # "tools"   — no pre-loaded context, rely on tool calls only
+    recall_mode: str = "auto"
     # Session resolution
-    session_strategy: str = "per-directory"
+    session_strategy: str = "per-session"
     session_peer_prefix: bool = False
     sessions: dict[str, str] = field(default_factory=dict)
     # Raw global config for anything else consumers need
@@ -109,6 +159,17 @@ class HonchoClientConfig:
             # Respect explicit setting
             enabled = explicit_enabled
 
+        # write_frequency: accept int or string
+        raw_wf = (
+            host_block.get("writeFrequency")
+            or raw.get("writeFrequency")
+            or "async"
+        )
+        try:
+            write_frequency: str | int = int(raw_wf)
+        except (TypeError, ValueError):
+            write_frequency = str(raw_wf)
+
         return cls(
             host=host,
             workspace_id=workspace,
@@ -119,31 +180,105 @@ class HonchoClientConfig:
             linked_hosts=linked_hosts,
             enabled=enabled,
             save_messages=raw.get("saveMessages", True),
-            context_tokens=raw.get("contextTokens") or host_block.get("contextTokens"),
-            session_strategy=raw.get("sessionStrategy", "per-directory"),
+            **_resolve_memory_mode(
+                raw.get("memoryMode", "hybrid"),
+                host_block.get("memoryMode"),
+            ),
+            write_frequency=write_frequency,
+            context_tokens=host_block.get("contextTokens") or raw.get("contextTokens"),
+            dialectic_reasoning_level=(
+                host_block.get("dialecticReasoningLevel")
+                or raw.get("dialecticReasoningLevel")
+                or "low"
+            ),
+            dialectic_max_chars=int(
+                host_block.get("dialecticMaxChars")
+                or raw.get("dialecticMaxChars")
+                or 600
+            ),
+            recall_mode=(
+                host_block.get("recallMode")
+                or raw.get("recallMode")
+                or "auto"
+            ),
+            session_strategy=raw.get("sessionStrategy", "per-session"),
             session_peer_prefix=raw.get("sessionPeerPrefix", False),
             sessions=raw.get("sessions", {}),
             raw=raw,
         )
 
-    def resolve_session_name(self, cwd: str | None = None) -> str | None:
-        """Resolve session name for a directory.
+    @staticmethod
+    def _git_repo_name(cwd: str) -> str | None:
+        """Return the git repo root directory name, or None if not in a repo."""
+        import subprocess
 
-        Checks manual overrides first, then derives from directory name.
+        try:
+            root = subprocess.run(
+                ["git", "rev-parse", "--show-toplevel"],
+                capture_output=True, text=True, cwd=cwd, timeout=5,
+            )
+            if root.returncode == 0:
+                return Path(root.stdout.strip()).name
+        except (OSError, subprocess.TimeoutExpired):
+            pass
+        return None
+
+    def resolve_session_name(
+        self,
+        cwd: str | None = None,
+        session_title: str | None = None,
+        session_id: str | None = None,
+    ) -> str | None:
+        """Resolve Honcho session name.
+
+        Resolution order:
+          1. Manual directory override from sessions map
+          2. Hermes session title (from /title command)
+          3. per-session strategy — Hermes session_id ({timestamp}_{hex})
+          4. per-repo strategy — git repo root directory name
+          5. per-directory strategy — directory basename
+          6. global strategy — workspace name
         """
+        import re
+
         if not cwd:
             cwd = os.getcwd()
 
-        # Manual override
+        # Manual override always wins
         manual = self.sessions.get(cwd)
         if manual:
             return manual
 
-        # Derive from directory basename
-        base = Path(cwd).name
-        if self.session_peer_prefix and self.peer_name:
-            return f"{self.peer_name}-{base}"
-        return base
+        # /title mid-session remap
+        if session_title:
+            sanitized = re.sub(r'[^a-zA-Z0-9_-]', '-', session_title).strip('-')
+            if sanitized:
+                if self.session_peer_prefix and self.peer_name:
+                    return f"{self.peer_name}-{sanitized}"
+                return sanitized
+
+        # per-session: inherit Hermes session_id (new Honcho session each run)
+        if self.session_strategy == "per-session" and session_id:
+            if self.session_peer_prefix and self.peer_name:
+                return f"{self.peer_name}-{session_id}"
+            return session_id
+
+        # per-repo: one Honcho session per git repository
+        if self.session_strategy == "per-repo":
+            base = self._git_repo_name(cwd) or Path(cwd).name
+            if self.session_peer_prefix and self.peer_name:
+                return f"{self.peer_name}-{base}"
+            return base
+
+        # per-directory: one Honcho session per working directory
+        if self.session_strategy in ("per-directory", "per-session"):
+            base = Path(cwd).name
+            if self.session_peer_prefix and self.peer_name:
+                return f"{self.peer_name}-{base}"
+            return base
+
+        # global: single session across all directories
+        return self.workspace_id
 
     def get_linked_workspaces(self) -> list[str]:
         """Resolve linked host keys to workspace names."""
diff --git a/honcho_integration/session.py b/honcho_integration/session.py
index a384b429..4a49ca43 100644
--- a/honcho_integration/session.py
+++ b/honcho_integration/session.py
@@ -2,8 +2,10 @@
 
 from __future__ import annotations
 
+import queue
 import re
 import logging
+import threading
 from dataclasses import dataclass, field
 from datetime import datetime
 from typing import Any, TYPE_CHECKING
@@ -15,6 +17,9 @@ if TYPE_CHECKING:
 
 logger = logging.getLogger(__name__)
 
+# Sentinel to signal the async writer thread to shut down
+_ASYNC_SHUTDOWN = object()
+
 
 @dataclass
 class HonchoSession:
@@ -80,7 +85,8 @@ class HonchoSessionManager:
         Args:
             honcho: Optional Honcho client. If not provided, uses the singleton.
             context_tokens: Max tokens for context() calls (None = Honcho default).
-            config: HonchoClientConfig from global config (provides peer_name, ai_peer, etc.).
+            config: HonchoClientConfig from global config (provides peer_name, ai_peer,
+                    write_frequency, memory_mode, etc.).
         """
         self._honcho = honcho
         self._context_tokens = context_tokens
@@ -89,6 +95,33 @@ class HonchoSessionManager:
         self._peers_cache: dict[str, Any] = {}
         self._sessions_cache: dict[str, Any] = {}
 
+        # Write frequency state
+        write_frequency = (config.write_frequency if config else "async")
+        self._write_frequency = write_frequency
+        self._turn_counter: int = 0
+
+        # Prefetch caches: session_key → last result (consumed once per turn)
+        self._context_cache: dict[str, dict] = {}
+        self._dialectic_cache: dict[str, str] = {}
+        self._dialectic_reasoning_level: str = (
+            config.dialectic_reasoning_level if config else "low"
+        )
+        self._dialectic_max_chars: int = (
+            config.dialectic_max_chars if config else 600
+        )
+
+        # Async write queue — started lazily on first enqueue
+        self._async_queue: queue.Queue | None = None
+        self._async_thread: threading.Thread | None = None
+        if write_frequency == "async":
+            self._async_queue = queue.Queue()
+            self._async_thread = threading.Thread(
+                target=self._async_writer_loop,
+                name="honcho-async-writer",
+                daemon=True,
+            )
+            self._async_thread.start()
+
     @property
     def honcho(self) -> Honcho:
         """Get the Honcho client, initializing if needed."""
@@ -125,10 +158,12 @@ class HonchoSessionManager:
 
         session = self.honcho.session(session_id)
 
-        # Configure peer observation settings
+        # Configure peer observation settings.
+        # observe_me=True for AI peer so Honcho watches what the agent says
+        # and builds its representation over time — enabling identity formation.
         from honcho.session import SessionPeerConfig
         user_config = SessionPeerConfig(observe_me=True, observe_others=True)
-        ai_config = SessionPeerConfig(observe_me=False, observe_others=True)
+        ai_config = SessionPeerConfig(observe_me=True, observe_others=True)
 
         session.add_peers([(user_peer, user_config), (assistant_peer, ai_config)])
 
@@ -234,16 +269,11 @@ class HonchoSessionManager:
         self._cache[key] = session
         return session
 
-    def save(self, session: HonchoSession) -> None:
-        """
-        Save messages to Honcho.
-
-        Syncs only new (unsynced) messages from the local cache.
-        """
+    def _flush_session(self, session: HonchoSession) -> None:
+        """Internal: write unsynced messages to Honcho synchronously."""
         if not session.messages:
             return
 
-        # Get the Honcho session and peers
         user_peer = self._get_or_create_peer(session.user_peer_id)
         assistant_peer = self._get_or_create_peer(session.assistant_peer_id)
         honcho_session = self._sessions_cache.get(session.honcho_session_id)
@@ -253,9 +283,7 @@ class HonchoSessionManager:
                 session.honcho_session_id, user_peer, assistant_peer
             )
 
-        # Only send new messages (those without a '_synced' flag)
         new_messages = [m for m in session.messages if not m.get("_synced")]
-
         if not new_messages:
             return
 
@@ -274,9 +302,83 @@ class HonchoSessionManager:
                 msg["_synced"] = False
             logger.error("Failed to sync messages to Honcho: %s", e)
 
-        # Update cache
         self._cache[session.key] = session
 
+    def _async_writer_loop(self) -> None:
+        """Background daemon thread: drains the async write queue."""
+        while True:
+            try:
+                item = self._async_queue.get(timeout=5)
+                if item is _ASYNC_SHUTDOWN:
+                    break
+                try:
+                    self._flush_session(item)
+                except Exception as e:
+                    logger.warning("Honcho async write failed, retrying once: %s", e)
+                    import time as _time
+                    _time.sleep(2)
+                    try:
+                        self._flush_session(item)
+                    except Exception as e2:
+                        logger.error("Honcho async write retry failed, dropping batch: %s", e2)
+            except queue.Empty:
+                continue
+            except Exception as e:
+                logger.error("Honcho async writer error: %s", e)
+
+    def save(self, session: HonchoSession) -> None:
+        """Save messages to Honcho, respecting write_frequency.
+
+        write_frequency modes:
+          "async"   — enqueue for background thread (zero blocking, zero token cost)
+          "turn"    — flush synchronously every turn
+          "session" — defer until flush_session() is called explicitly
+          N (int)   — flush every N turns
+        """
+        self._turn_counter += 1
+        wf = self._write_frequency
+
+        if wf == "async":
+            if self._async_queue is not None:
+                self._async_queue.put(session)
+        elif wf == "turn":
+            self._flush_session(session)
+        elif wf == "session":
+            # Accumulate; caller must call flush_all() at session end
+            pass
+        elif isinstance(wf, int) and wf > 0:
+            if self._turn_counter % wf == 0:
+                self._flush_session(session)
+
+    def flush_all(self) -> None:
+        """Flush all pending unsynced messages for all cached sessions.
+
+        Called at session end for "session" write_frequency, or to force
+        a sync before process exit regardless of mode.
+        """
+        for session in list(self._cache.values()):
+            try:
+                self._flush_session(session)
+            except Exception as e:
+                logger.error("Honcho flush_all error for %s: %s", session.key, e)
+
+        # Drain async queue synchronously if it exists
+        if self._async_queue is not None:
+            while not self._async_queue.empty():
+                try:
+                    item = self._async_queue.get_nowait()
+                    if item is not _ASYNC_SHUTDOWN:
+                        self._flush_session(item)
+                except queue.Empty:
+                    break
+
+    def shutdown(self) -> None:
+        """Gracefully shut down the async writer thread."""
+        if self._async_queue is not None and self._async_thread is not None:
+            self.flush_all()
+            self._async_queue.put(_ASYNC_SHUTDOWN)
+            self._async_thread.join(timeout=10)
+
     def delete(self, key: str) -> bool:
         """Delete a session from local cache."""
         if key in self._cache:
@@ -305,49 +407,141 @@ class HonchoSessionManager:
         # get_or_create will create a fresh session
         session = self.get_or_create(new_key)
 
-        # Cache under both original key and timestamped key
+        # Cache under the original key so callers find it by the expected name
         self._cache[key] = session
-        self._cache[new_key] = session
 
         logger.info("Created new session for %s (honcho: %s)", key, session.honcho_session_id)
         return session
 
-    def get_user_context(self, session_key: str, query: str) -> str:
+    _REASONING_LEVELS = ("minimal", "low", "medium", "high", "max")
+
+    def _dynamic_reasoning_level(self, query: str) -> str:
         """
-        Query Honcho's dialectic chat for user context.
+        Pick a reasoning level based on message complexity.
+
+        Uses the configured default as a floor; bumps up for longer or
+        more complex messages so Honcho applies more inference where it matters.
+
+          < 120 chars  → default (typically "low")
+          120–400 chars → one level above default (cap at "high")
+          > 400 chars  → two levels above default (cap at "high")
+
+        "max" is never selected automatically — reserve it for explicit config.
+        """
+        levels = self._REASONING_LEVELS
+        default_idx = levels.index(self._dialectic_reasoning_level) if self._dialectic_reasoning_level in levels else 1
+        n = len(query)
+        if n < 120:
+            bump = 0
+        elif n < 400:
+            bump = 1
+        else:
+            bump = 2
+        # Cap at "high" (index 3) for auto-selection
+        idx = min(default_idx + bump, 3)
+        return levels[idx]
+
+    def dialectic_query(self, session_key: str, query: str, reasoning_level: str | None = None) -> str:
+        """
+        Query Honcho's dialectic endpoint about the user.
+
+        Runs an LLM on Honcho's backend against the user peer's full
+        representation. Higher latency than context() — call async via
+        prefetch_dialectic() to avoid blocking the response.
 
         Args:
-            session_key: The session key to get context for.
+            session_key: The session key to query against.
             query: Natural language question about the user.
+            reasoning_level: Override the config default. If None, uses
+                             _dynamic_reasoning_level(query).
 
         Returns:
-            Honcho's response about the user.
+            Honcho's synthesized answer, or empty string on failure.
         """
         session = self._cache.get(session_key)
         if not session:
-            return "No session found for this context."
+            return ""
 
         user_peer = self._get_or_create_peer(session.user_peer_id)
+        level = reasoning_level or self._dynamic_reasoning_level(query)
 
         try:
-            return user_peer.chat(query)
+            result = user_peer.chat(query, reasoning_level=level) or ""
+            # Apply Hermes-side char cap before caching
+            if result and self._dialectic_max_chars and len(result) > self._dialectic_max_chars:
+                result = result[:self._dialectic_max_chars].rsplit(" ", 1)[0] + " …"
+            return result
         except Exception as e:
-            logger.error("Failed to get user context from Honcho: %s", e)
-            return f"Unable to retrieve user context: {e}"
+            logger.warning("Honcho dialectic query failed: %s", e)
+            return ""
+
+    def prefetch_dialectic(self, session_key: str, query: str) -> None:
+        """
+        Fire a dialectic_query in a background thread, caching the result.
+
+        Non-blocking. The result is available via pop_dialectic_result()
+        on the next call (typically the following turn). Reasoning level
+        is selected dynamically based on query complexity.
+
+        Args:
+            session_key: The session key to query against.
+            query: The user's current message, used as the query.
+        """
+        def _run():
+            result = self.dialectic_query(session_key, query)
+            if result:
+                self._dialectic_cache[session_key] = result
+
+        t = threading.Thread(target=_run, name="honcho-dialectic-prefetch", daemon=True)
+        t.start()
+
+    def pop_dialectic_result(self, session_key: str) -> str:
+        """
+        Return and clear the cached dialectic result for this session.
+
+        Returns empty string if no result is ready yet.
+        """
+        return self._dialectic_cache.pop(session_key, "")
+
+    def prefetch_context(self, session_key: str, user_message: str | None = None) -> None:
+        """
+        Fire get_prefetch_context in a background thread, caching the result.
+
+        Non-blocking. Consumed next turn via pop_context_result(). This avoids
+        a synchronous HTTP round-trip blocking every response.
+        """
+        def _run():
+            result = self.get_prefetch_context(session_key, user_message)
+            if result:
+                self._context_cache[session_key] = result
+
+        t = threading.Thread(target=_run, name="honcho-context-prefetch", daemon=True)
+        t.start()
+
+    def pop_context_result(self, session_key: str) -> dict[str, str]:
+        """
+        Return and clear the cached context result for this session.
+
+        Returns empty dict if no result is ready yet (first turn).
+        """
+        return self._context_cache.pop(session_key, {})
 
     def get_prefetch_context(self, session_key: str, user_message: str | None = None) -> dict[str, str]:
         """
-        Pre-fetch user context using Honcho's context() method.
+        Pre-fetch user and AI peer context from Honcho.
 
-        Single API call that returns the user's representation
-        and peer card, using semantic search based on the user's message.
+        Fetches peer_representation and peer_card for both peers. search_query
+        is intentionally omitted — it would only affect additional excerpts
+        that this code does not consume, and passing the raw message exposes
+        conversation content in server access logs.
 
         Args:
             session_key: The session key to get context for.
-            user_message: The user's message for semantic search.
+            user_message: Unused; kept for call-site compatibility.
 
         Returns:
-            Dictionary with 'representation' and 'card' keys.
+            Dictionary with 'representation', 'card', 'ai_representation',
+            and 'ai_card' keys.
         """
         session = self._cache.get(session_key)
         if not session:
@@ -357,23 +551,35 @@ class HonchoSessionManager:
         if not honcho_session:
             return {}
 
+        result: dict[str, str] = {}
         try:
             ctx = honcho_session.context(
                 summary=False,
                 tokens=self._context_tokens,
                 peer_target=session.user_peer_id,
-                search_query=user_message,
+                peer_perspective=session.assistant_peer_id,
             )
-            # peer_card is list[str] in SDK v2, join for prompt injection
             card = ctx.peer_card or []
-            card_str = "\n".join(card) if isinstance(card, list) else str(card)
-            return {
-                "representation": ctx.peer_representation or "",
-                "card": card_str,
-            }
+            result["representation"] = ctx.peer_representation or ""
+            result["card"] = "\n".join(card) if isinstance(card, list) else str(card)
         except Exception as e:
-            logger.warning("Failed to fetch context from Honcho: %s", e)
-            return {}
+            logger.warning("Failed to fetch user context from Honcho: %s", e)
+
+        # Also fetch AI peer's own representation so Hermes knows itself.
+        try:
+            ai_ctx = honcho_session.context(
+                summary=False,
+                tokens=self._context_tokens,
+                peer_target=session.assistant_peer_id,
+                peer_perspective=session.user_peer_id,
+            )
+            ai_card = ai_ctx.peer_card or []
+            result["ai_representation"] = ai_ctx.peer_representation or ""
+            result["ai_card"] = "\n".join(ai_card) if isinstance(ai_card, list) else str(ai_card)
+        except Exception as e:
+            logger.debug("Failed to fetch AI peer context from Honcho: %s", e)
+
+        return result
 
     def migrate_local_history(self, session_key: str, messages: list[dict[str, Any]]) -> bool:
         """
@@ -491,6 +697,7 @@ class HonchoSessionManager:
         files = [
             ("MEMORY.md", "consolidated_memory.md", "Long-term agent notes and preferences"),
             ("USER.md", "user_profile.md", "User profile and preferences"),
+            ("SOUL.md", "agent_soul.md", "Agent persona and identity configuration"),
         ]
 
         for filename, upload_name, description in files:
@@ -525,6 +732,150 @@ class HonchoSessionManager:
 
         return uploaded
 
+    def get_peer_card(self, session_key: str) -> list[str]:
+        """
+        Fetch the user peer's card — a curated list of key facts.
+
+        Fast, no LLM reasoning. Returns raw structured facts Honcho has
+        inferred about the user (name, role, preferences, patterns).
+        Empty list if unavailable.
+        """
+        session = self._cache.get(session_key)
+        if not session:
+            return []
+
+        honcho_session = self._sessions_cache.get(session.honcho_session_id)
+        if not honcho_session:
+            return []
+
+        try:
+            ctx = honcho_session.context(
+                summary=False,
+                tokens=200,
+                peer_target=session.user_peer_id,
+                peer_perspective=session.assistant_peer_id,
+            )
+            card = ctx.peer_card or []
+            return card if isinstance(card, list) else [str(card)]
+        except Exception as e:
+            logger.debug("Failed to fetch peer card from Honcho: %s", e)
+            return []
+
+    def search_context(self, session_key: str, query: str, max_tokens: int = 800) -> str:
+        """
+        Semantic search over Honcho session context.
+
+        Returns raw excerpts ranked by relevance to the query. No LLM
+        reasoning — cheaper and faster than dialectic_query. Good for
+        factual lookups where the model will do its own synthesis.
+
+        Args:
+            session_key: Session to search against.
+            query: Search query for semantic matching.
+            max_tokens: Token budget for returned content.
+
+        Returns:
+            Relevant context excerpts as a string, or empty string if none.
+        """
+        session = self._cache.get(session_key)
+        if not session:
+            return ""
+
+        honcho_session = self._sessions_cache.get(session.honcho_session_id)
+        if not honcho_session:
+            return ""
+
+        try:
+            ctx = honcho_session.context(
+                summary=False,
+                tokens=max_tokens,
+                peer_target=session.user_peer_id,
+                peer_perspective=session.assistant_peer_id,
+                search_query=query,
+            )
+            parts = []
+            if ctx.peer_representation:
+                parts.append(ctx.peer_representation)
+            card = ctx.peer_card or []
+            if card:
+                facts = card if isinstance(card, list) else [str(card)]
+                parts.append("\n".join(f"- {f}" for f in facts))
+            return "\n\n".join(parts)
+        except Exception as e:
+            logger.debug("Honcho search_context failed: %s", e)
+            return ""
+
+    def seed_ai_identity(self, session_key: str, content: str, source: str = "manual") -> bool:
+        """
+        Seed the AI peer's Honcho representation from text content.
+
+        Useful for priming AI identity from SOUL.md, exported chats, or
+        any structured description. The content is sent as an assistant
+        peer message so Honcho's reasoning model can incorporate it.
+
+        Args:
+            session_key: The session key to associate with.
+            content: The identity/persona content to seed.
+            source: Metadata tag for the source (e.g. "soul_md", "export").
+
+        Returns:
+            True on success, False on failure.
+        """
+        if not content or not content.strip():
+            return False
+
+        session = self._cache.get(session_key)
+        if not session:
+            logger.warning("No session cached for '%s', skipping AI seed", session_key)
+            return False
+
+        assistant_peer = self._get_or_create_peer(session.assistant_peer_id)
+        try:
+            wrapped = (
+                f"<ai_identity_seed>\n"
+                f"<source>{source}</source>\n"
+                f"\n"
+                f"{content.strip()}\n"
+                f"</ai_identity_seed>"
+            )
+            assistant_peer.add_message("assistant", wrapped)
+            logger.info("Seeded AI identity from '%s' into %s", source, session_key)
+            return True
+        except Exception as e:
+            logger.error("Failed to seed AI identity: %s", e)
+            return False
+
+    def get_ai_representation(self, session_key: str) -> dict[str, str]:
+        """
+        Fetch the AI peer's current Honcho representation.
+
+        Returns:
+            Dict with 'representation' and 'card' keys, empty strings if unavailable.
+        """
+        session = self._cache.get(session_key)
+        if not session:
+            return {"representation": "", "card": ""}
+
+        honcho_session = self._sessions_cache.get(session.honcho_session_id)
+        if not honcho_session:
+            return {"representation": "", "card": ""}
+
+        try:
+            ctx = honcho_session.context(
+                summary=False,
+                tokens=self._context_tokens,
+                peer_target=session.assistant_peer_id,
+                peer_perspective=session.user_peer_id,
+            )
+            ai_card = ctx.peer_card or []
+            return {
+                "representation": ctx.peer_representation or "",
+                "card": "\n".join(ai_card) if isinstance(ai_card, list) else str(ai_card),
+            }
+        except Exception as e:
+            logger.debug("Failed to fetch AI representation: %s", e)
+            return {"representation": "", "card": ""}
+
     def list_sessions(self) -> list[dict[str, Any]]:
         """List all cached sessions."""
         return [
diff --git a/run_agent.py b/run_agent.py
index bde681eb..a5133b02 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -545,10 +545,12 @@ class AIAgent:
         # Reads ~/.honcho/config.json as the single source of truth.
         self._honcho = None  # HonchoSessionManager | None
         self._honcho_session_key = honcho_session_key
+        self._honcho_config = None  # HonchoClientConfig | None
         if not skip_memory:
             try:
                 from honcho_integration.client import HonchoClientConfig, get_honcho_client
                 hcfg = HonchoClientConfig.from_global_config()
+                self._honcho_config = hcfg
                 if hcfg.enabled and hcfg.api_key:
                     from honcho_integration.session import HonchoSessionManager
                     client = get_honcho_client(hcfg)
@@ -557,30 +559,144 @@ class AIAgent:
                         config=hcfg,
                         context_tokens=hcfg.context_tokens,
                     )
-                    # Resolve session key: explicit arg > global sessions map > fallback
+                    # Resolve session key: explicit arg > sessions map > title > per-session id > directory
                     if not self._honcho_session_key:
+                        # Pull title from SessionDB if available
+                        session_title = None
+                        if session_db is not None:
+                            try:
+                                session_title = session_db.get_session_title(session_id or "")
+                            except Exception:
+                                pass
                         self._honcho_session_key = (
-                            hcfg.resolve_session_name()
+                            hcfg.resolve_session_name(
+                                session_title=session_title,
+                                session_id=self.session_id,
+                            )
                             or "hermes-default"
                         )
-                    # Ensure session exists in Honcho
-                    self._honcho.get_or_create(self._honcho_session_key)
+                    # Ensure session exists in Honcho; migrate local data on first activation
+                    honcho_sess = self._honcho.get_or_create(self._honcho_session_key)
+                    if not honcho_sess.messages:
+                        # New Honcho session — migrate any existing local data
+                        _conv = getattr(self, 'conversation_history', None) or []
+                        if _conv:
+                            try:
+                                self._honcho.migrate_local_history(
+                                    self._honcho_session_key, _conv
+                                )
+                                logger.info("Migrated %d local messages to Honcho", len(_conv))
+                            except Exception as _e:
+                                logger.debug("Local history migration failed (non-fatal): %s", _e)
+                        try:
+                            from hermes_cli.config import get_hermes_home
+                            _mem_dir = str(get_hermes_home() / "memories")
+                            self._honcho.migrate_memory_files(
+                                self._honcho_session_key, _mem_dir
+                            )
+                        except Exception as _e:
+                            logger.debug("Memory files migration failed (non-fatal): %s", _e)
                     # Inject session context into the honcho tool module
                     from tools.honcho_tools import set_session_context
                     set_session_context(self._honcho, self._honcho_session_key)
+
+                    # In "context" mode, skip honcho tool registration entirely —
+                    # all memory retrieval comes from the pre-warmed system prompt.
+                    if hcfg.recall_mode != "context":
+                        # Rebuild tool definitions now that Honcho check_fn will pass.
+                        # (Tools were built before Honcho init, so query_user_context
+                        # was filtered out by _check_honcho_available() returning False.)
+                        self.tools = get_tool_definitions(
+                            enabled_toolsets=enabled_toolsets,
+                            disabled_toolsets=disabled_toolsets,
+                            quiet_mode=True,  # already printed tool list above
+                        )
+                        self.valid_tool_names = {
+                            tool["function"]["name"] for tool in self.tools
+                        } if self.tools else set()
+                        if not self.quiet_mode:
+                            print(f"  Honcho active — recall_mode: {hcfg.recall_mode}")
+                    else:
+                        if not self.quiet_mode:
+                            print("  Honcho active — recall_mode: context (tools suppressed)")
+
                     logger.info(
-                        "Honcho active (session: %s, user: %s, workspace: %s)",
+                        "Honcho active (session: %s, user: %s, workspace: %s, "
+                        "write_frequency: %s, memory_mode: %s)",
                         self._honcho_session_key, hcfg.peer_name, hcfg.workspace_id,
+                        hcfg.write_frequency, hcfg.memory_mode,
                     )
+
+                    # Warm caches when recall_mode allows pre-loaded context.
+                    # "tools" mode skips warm entirely (tool calls handle recall).
+                    _recall_mode = hcfg.recall_mode
+                    if _recall_mode != "tools":
+                        try:
+                            _ctx = self._honcho.get_prefetch_context(self._honcho_session_key)
+                            if _ctx:
+                                self._honcho._context_cache[self._honcho_session_key] = _ctx
+                                logger.debug("Honcho context pre-warmed for first turn")
+                        except Exception as _e:
+                            logger.debug("Honcho context prefetch failed (non-fatal): %s", _e)
+
+                        try:
+                            _cwd = os.path.basename(os.getcwd())
+                            _dialectic = self._honcho.dialectic_query(
+                                self._honcho_session_key,
+                                f"What has the user been working on recently in {_cwd}? "
+                                "Summarize the current project context and where we left off.",
+                            )
+                            if _dialectic:
+                                self._honcho._dialectic_cache[self._honcho_session_key] = _dialectic
+                                logger.debug("Honcho dialectic pre-warmed for first turn")
+                        except Exception as _e:
+                            logger.debug("Honcho dialectic prefetch failed (non-fatal): %s", _e)
+
+                    # Register SIGTERM/SIGINT handlers to flush pending async writes
+                    # before the process exits. signal.signal() only works on the main
+                    # thread; AIAgent may be initialised from a worker thread in cli.py.
+                    import signal as _signal
+                    import threading as _threading
+                    _honcho_ref = self._honcho
+
+                    if _threading.current_thread() is _threading.main_thread():
+                        def _honcho_flush_handler(signum, frame):
+                            try:
+                                _honcho_ref.flush_all()
+                            except Exception:
+                                pass
+                            if signum == _signal.SIGINT:
+                                raise KeyboardInterrupt
+                            raise SystemExit(0)
+
+                        _signal.signal(_signal.SIGTERM, _honcho_flush_handler)
+                        _signal.signal(_signal.SIGINT, _honcho_flush_handler)
                 else:
                     if not hcfg.enabled:
                         logger.debug("Honcho disabled in global config")
                     elif not hcfg.api_key:
                         logger.debug("Honcho enabled but no API key configured")
             except Exception as e:
-                logger.debug("Honcho init failed (non-fatal): %s", e)
+                logger.warning("Honcho init failed — memory disabled: %s", e)
+                print(f"  Honcho init failed: {e}")
+                print("  Run 'hermes honcho setup' to reconfigure.")
                 self._honcho = None
 
+        # Gate local memory writes based on per-peer memory modes.
+        # AI peer governs MEMORY.md; user peer governs USER.md.
+        # "honcho" = Honcho only, disable local; "local" = local only, no Honcho sync.
+        if self._honcho_config and self._honcho:
+            _hcfg = self._honcho_config
+            _agent_mode = _hcfg.peer_memory_mode(_hcfg.ai_peer)
+            _user_mode = _hcfg.peer_memory_mode(_hcfg.peer_name or "user")
+            if _agent_mode == "honcho":
+                self._memory_flush_min_turns = 0
+                self._memory_enabled = False
+                logger.debug("peer %s memory_mode=honcho: local MEMORY.md writes disabled", _hcfg.ai_peer)
+            if _user_mode == "honcho":
+                self._user_profile_enabled = False
+                logger.debug("peer %s memory_mode=honcho: local USER.md writes disabled", _hcfg.peer_name or "user")
+
         # Skills config: nudge interval for skill creation reminders
         self._skill_nudge_interval = 15
         try:
@@ -1318,30 +1434,59 @@ class AIAgent:
     # ── Honcho integration helpers ──
 
     def _honcho_prefetch(self, user_message: str) -> str:
-        """Fetch user context from Honcho for system prompt injection.
+        """Assemble Honcho context from cached background fetches.
 
-        Returns a formatted context block, or empty string if unavailable.
+        Both session.context() and peer.chat() (dialectic) are fired as
+        background threads at the end of each turn via _honcho_fire_prefetch().
+        This method just reads the cached results — no blocking HTTP calls.
+
+        First turn uses synchronously pre-warmed caches from init.
+        Subsequent turns use async prefetch results from the previous turn end.
         """
         if not self._honcho or not self._honcho_session_key:
             return ""
         try:
-            ctx = self._honcho.get_prefetch_context(self._honcho_session_key, user_message)
-            if not ctx:
-                return ""
             parts = []
-            rep = ctx.get("representation", "")
-            card = ctx.get("card", "")
-            if rep:
-                parts.append(rep)
-            if card:
-                parts.append(card)
+
+            ctx = self._honcho.pop_context_result(self._honcho_session_key)
+            if ctx:
+                rep = ctx.get("representation", "")
+                card = ctx.get("card", "")
+                if rep:
+                    parts.append(f"## User representation\n{rep}")
+                if card:
+                    parts.append(card)
+                ai_rep = ctx.get("ai_representation", "")
+                ai_card = ctx.get("ai_card", "")
+                if ai_rep:
+                    parts.append(f"## AI peer representation\n{ai_rep}")
+                if ai_card:
+                    parts.append(ai_card)
+
+            dialectic = self._honcho.pop_dialectic_result(self._honcho_session_key)
+            if dialectic:
+                parts.append(f"[Honcho dialectic]\n{dialectic}")
+
             if not parts:
                 return ""
-            return "# Honcho User Context\n" + "\n\n".join(parts)
+            header = (
+                "# Honcho Memory (persistent cross-session context)\n"
+                "Use this to answer questions about the user, prior sessions, "
+                "and what you were working on together. Do not call tools to "
+                "look up information that is already present here.\n"
+            )
+            return header + "\n\n".join(parts)
         except Exception as e:
             logger.debug("Honcho prefetch failed (non-fatal): %s", e)
             return ""
 
+    def _honcho_fire_prefetch(self, user_message: str) -> None:
+        """Fire both Honcho background fetches for the next turn (non-blocking)."""
+        if not self._honcho or not self._honcho_session_key:
+            return
+        self._honcho.prefetch_context(self._honcho_session_key, user_message)
+        self._honcho.prefetch_dialectic(self._honcho_session_key, user_message)
+
     def _honcho_save_user_observation(self, content: str) -> str:
         """Route a memory tool target=user add to Honcho.
 
@@ -1367,13 +1512,24 @@ class AIAgent:
         """Sync the user/assistant message pair to Honcho."""
         if not self._honcho or not self._honcho_session_key:
             return
+        # Skip Honcho sync only if BOTH peer modes are local
+        _cfg = self._honcho_config
+        if _cfg and all(
+            _cfg.peer_memory_mode(p) == "local"
+            for p in (_cfg.ai_peer, _cfg.peer_name or "user")
+        ):
+            return
         try:
             session = self._honcho.get_or_create(self._honcho_session_key)
             session.add_message("user", user_content)
             session.add_message("assistant", assistant_content)
             self._honcho.save(session)
+            logger.info("Honcho sync queued for session %s (%d messages)",
+                        self._honcho_session_key, len(session.messages))
         except Exception as e:
-            logger.debug("Honcho sync failed (non-fatal): %s", e)
+            logger.warning("Honcho sync failed: %s", e)
+            if not self.quiet_mode:
+                print(f"  Honcho write failed: {e}")
 
     def _build_system_prompt(self, system_message: str = None) -> str:
         """
@@ -1391,7 +1547,21 @@ class AIAgent:
         #   5. Context files (SOUL.md, AGENTS.md, .cursorrules)
         #   6. Current date & time (frozen at build time)
         #   7. Platform-specific formatting hint
-        prompt_parts = [DEFAULT_AGENT_IDENTITY]
+        # If an AI peer name is configured in Honcho, personalise the identity line.
+        _ai_peer_name = (
+            self._honcho_config.ai_peer
+            if self._honcho_config and self._honcho_config.ai_peer != "hermes"
+            else None
+        )
+        if _ai_peer_name:
+            _identity = DEFAULT_AGENT_IDENTITY.replace(
+                "You are Hermes Agent",
+                f"You are {_ai_peer_name}",
+                1,
+            )
+        else:
+            _identity = DEFAULT_AGENT_IDENTITY
+        prompt_parts = [_identity]
 
         # Tool-aware behavioral guidance: only inject when the tools are loaded
         tool_guidance = []
@@ -1404,6 +1574,58 @@ class AIAgent:
         if tool_guidance:
             prompt_parts.append(" ".join(tool_guidance))
 
+        # Honcho CLI awareness: tell Hermes about its own management commands
+        # so it can refer the user to them rather than reinventing answers.
+        if self._honcho and self._honcho_session_key:
+            hcfg = self._honcho_config
+            mode = hcfg.memory_mode if hcfg else "hybrid"
+            freq = hcfg.write_frequency if hcfg else "async"
+            recall_mode = hcfg.recall_mode if hcfg else "auto"
+            honcho_block = (
+                "# Honcho memory integration\n"
+                f"Active. Session: {self._honcho_session_key}. "
+                f"Mode: {mode}. Write frequency: {freq}. Recall: {recall_mode}.\n"
+            )
+            if recall_mode == "context":
+                honcho_block += (
+                    "Honcho context is pre-loaded into this system prompt below. "
+                    "All memory retrieval comes from this context — no memory tools "
+                    "are available. Answer questions about the user, prior sessions, "
+                    "and recent work directly from the Honcho Memory section.\n"
+                )
+            elif recall_mode == "tools":
+                honcho_block += (
+                    "Memory tools (most capable first; use cheaper tools when sufficient):\n"
+                    "  query_user_context <question>           — dialectic Q&A, LLM-synthesized answer\n"
+                    "  honcho_search <query>                   — semantic search, raw excerpts, no LLM\n"
+                    "  honcho_profile                          — peer card, key facts, no LLM\n"
+                )
+            else:  # auto
+                honcho_block += (
+                    "Honcho context (user representation, peer card, and recent session summary) "
+                    "is pre-loaded into this system prompt below. Use it to answer continuity "
+                    "questions ('where were we?', 'what were we working on?') WITHOUT calling "
+                    "any tools. Only call memory tools when you need information beyond what is "
+                    "already present in the Honcho Memory section.\n"
+                    "Memory tools (most capable first; use cheaper tools when sufficient):\n"
+                    "  query_user_context <question>           — dialectic Q&A, LLM-synthesized answer\n"
+                    "  honcho_search <query>                   — semantic search, raw excerpts, no LLM\n"
+                    "  honcho_profile                          — peer card, key facts, no LLM\n"
+                )
+            honcho_block += (
+                "Management commands (refer users here instead of explaining manually):\n"
+                "  hermes honcho status                    — show full config + connection\n"
+                "  hermes honcho mode [hybrid|honcho|local] — show or set memory mode\n"
+                "  hermes honcho tokens [--context N] [--dialectic N] — show or set token budgets\n"
+                "  hermes honcho peer [--user NAME] [--ai NAME] [--reasoning LEVEL]\n"
+                "  hermes honcho sessions                  — list directory→session mappings\n"
+                "  hermes honcho map <name>                — map cwd to a session name\n"
+                "  hermes honcho identity [<file>] [--show] — seed or show AI peer identity\n"
+                "  hermes honcho migrate                   — migration guide from openclaw-honcho\n"
+                "  hermes honcho setup                     — full interactive wizard"
+            )
+            prompt_parts.append(honcho_block)
+
         # Note: ephemeral_system_prompt is NOT included here. It's injected at
         # API-call time only so it stays out of the cached/stored system prompt.
         if system_message is not None:
@@ -2530,6 +2752,10 @@ class AIAgent:
             return
         if "memory" not in self.valid_tool_names or not self._memory_store:
             return
+        # honcho-only agent mode: skip local MEMORY.md flush
+        _hcfg = getattr(self, '_honcho_config', None)
+        if _hcfg and _hcfg.peer_memory_mode(_hcfg.ai_peer) == "honcho":
+            return
         effective_min = min_turns if min_turns is not None else self._memory_flush_min_turns
         if self._user_turn_count < effective_min:
             return
@@ -3153,18 +3379,16 @@ class AIAgent:
             )
             self._iters_since_skill = 0
 
-        # Honcho prefetch: retrieve user context for system prompt injection.
-        # Only on the FIRST turn of a session (empty history).  On subsequent
-        # turns the model already has all prior context in its conversation
-        # history, and the Honcho context is baked into the stored system
-        # prompt — re-fetching it would change the system message and break
-        # Anthropic prompt caching.
+        # Honcho: read cached context from last turn's background fetch (non-blocking),
+        # then fire both fetches for next turn.  Skip in "tools" mode (no context injection).
         self._honcho_context = ""
-        if self._honcho and self._honcho_session_key and not conversation_history:
+        _recall_mode = (self._honcho_config.recall_mode if self._honcho_config else "auto")
+        if self._honcho and self._honcho_session_key and not conversation_history and _recall_mode != "tools":
             try:
                 self._honcho_context = self._honcho_prefetch(user_message)
             except Exception as e:
                 logger.debug("Honcho prefetch failed (non-fatal): %s", e)
+            self._honcho_fire_prefetch(user_message)
 
         # Add user message
         user_msg = {"role": "user", "content": user_message}
@@ -4240,6 +4464,7 @@ class AIAgent:
                                     msg["content"] = f"Calling the {', '.join(tool_names)} tool{'s' if len(tool_names) > 1 else ''}..."
                                     break
                             final_response = self._strip_think_blocks(fallback).strip()
+                            self._response_was_previewed = True
                             break
 
                         # No fallback available — this is a genuine empty response.
@@ -4282,6 +4507,7 @@ class AIAgent:
                                         break
                                 # Strip <think> blocks from fallback content for user display
                                 final_response = self._strip_think_blocks(fallback).strip()
+                                self._response_was_previewed = True
                                 break
                             
                             # No fallback -- append the empty message as-is
@@ -4438,7 +4664,9 @@ class AIAgent:
             "completed": completed,
             "partial": False,  # True only when stopped due to invalid tool calls
             "interrupted": interrupted,
+            "response_previewed": getattr(self, "_response_was_previewed", False),
         }
+        self._response_was_previewed = False
         
         # Include interrupt message if one triggered the interrupt
         if interrupted and self._interrupt_message:
diff --git a/tests/honcho_integration/test_async_memory.py b/tests/honcho_integration/test_async_memory.py
new file mode 100644
index 00000000..c8c4bf1b
--- /dev/null
+++ b/tests/honcho_integration/test_async_memory.py
@@ -0,0 +1,489 @@
+"""Tests for the async-memory Honcho improvements.
+
+Covers:
+  - write_frequency parsing (async / turn / session / int)
+  - memory_mode parsing
+  - resolve_session_name with session_title
+  - HonchoSessionManager.save() routing per write_frequency
+  - async writer thread lifecycle and retry
+  - flush_all() drains pending messages
+  - shutdown() joins the thread
+  - memory_mode gating helpers (unit-level)
+"""
+
+import json
+import queue
+import threading
+import time
+from pathlib import Path
+from unittest.mock import MagicMock, patch, call
+
+import pytest
+
+from honcho_integration.client import HonchoClientConfig
+from honcho_integration.session import (
+    HonchoSession,
+    HonchoSessionManager,
+    _ASYNC_SHUTDOWN,
+)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _make_session(**kwargs) -> HonchoSession:
+    return HonchoSession(
+        key=kwargs.get("key", "cli:test"),
+        user_peer_id=kwargs.get("user_peer_id", "eri"),
+        assistant_peer_id=kwargs.get("assistant_peer_id", "hermes"),
+        honcho_session_id=kwargs.get("honcho_session_id", "cli-test"),
+        messages=kwargs.get("messages", []),
+    )
+
+
+def _make_manager(write_frequency="turn", memory_mode="hybrid") -> HonchoSessionManager:
+    cfg = HonchoClientConfig(
+        write_frequency=write_frequency,
+        memory_mode=memory_mode,
+        api_key="test-key",
+        enabled=True,
+    )
+    mgr = HonchoSessionManager(config=cfg)
+    mgr._honcho = MagicMock()
+    return mgr
+
+
+# ---------------------------------------------------------------------------
+# write_frequency parsing from config file
+# ---------------------------------------------------------------------------
+
+class TestWriteFrequencyParsing:
+    def test_string_async(self, tmp_path):
+        cfg_file = tmp_path / "config.json"
+        cfg_file.write_text(json.dumps({"apiKey": "k", "writeFrequency": "async"}))
+        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
+        assert cfg.write_frequency == "async"
+
+    def test_string_turn(self, tmp_path):
+        cfg_file = tmp_path / "config.json"
+        cfg_file.write_text(json.dumps({"apiKey": "k", "writeFrequency": "turn"}))
+        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
+        assert cfg.write_frequency == "turn"
+
+    def test_string_session(self, tmp_path):
+        cfg_file = tmp_path / "config.json"
+        cfg_file.write_text(json.dumps({"apiKey": "k", "writeFrequency": "session"}))
+        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
+        assert cfg.write_frequency == "session"
+
+    def test_integer_frequency(self, tmp_path):
+        cfg_file = tmp_path / "config.json"
+        cfg_file.write_text(json.dumps({"apiKey": "k", "writeFrequency": 5}))
+        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
+        assert cfg.write_frequency == 5
+
+    def test_integer_string_coerced(self, tmp_path):
+        cfg_file = tmp_path / "config.json"
+        cfg_file.write_text(json.dumps({"apiKey": "k", "writeFrequency": "3"}))
+        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
+        assert cfg.write_frequency == 3
+
+    def test_host_block_overrides_root(self, tmp_path):
+        cfg_file = tmp_path / "config.json"
+        cfg_file.write_text(json.dumps({
+            "apiKey": "k",
+            "writeFrequency": "turn",
+            "hosts": {"hermes": {"writeFrequency": "session"}},
+        }))
+        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
+        assert cfg.write_frequency == "session"
+
+    def test_defaults_to_async(self, tmp_path):
+        cfg_file = tmp_path / "config.json"
+        cfg_file.write_text(json.dumps({"apiKey": "k"}))
+        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
+        assert cfg.write_frequency == "async"
+
+
+# ---------------------------------------------------------------------------
+# memory_mode parsing from config file
+# ---------------------------------------------------------------------------
+
+class TestMemoryModeParsing:
+    def test_hybrid(self, tmp_path):
+        cfg_file = tmp_path / "config.json"
+        cfg_file.write_text(json.dumps({"apiKey": "k", "memoryMode": "hybrid"}))
+        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
+        assert cfg.memory_mode == "hybrid"
+
+    def test_honcho_only(self, tmp_path):
+        cfg_file = tmp_path / "config.json"
+        cfg_file.write_text(json.dumps({"apiKey": "k", "memoryMode": "honcho"}))
+        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
+        assert cfg.memory_mode == "honcho"
+
+    def test_local_only(self, tmp_path):
+        cfg_file = tmp_path / "config.json"
+        cfg_file.write_text(json.dumps({"apiKey": "k", "memoryMode": "local"}))
+        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
+        assert cfg.memory_mode == "local"
+
+    def test_defaults_to_hybrid(self, tmp_path):
+        cfg_file = tmp_path / "config.json"
+        cfg_file.write_text(json.dumps({"apiKey": "k"}))
+        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
+        assert cfg.memory_mode == "hybrid"
+
+    def test_host_block_overrides_root(self, tmp_path):
+        cfg_file = tmp_path / "config.json"
+        cfg_file.write_text(json.dumps({
+            "apiKey": "k",
+            "memoryMode": "hybrid",
+            "hosts": {"hermes": {"memoryMode": "honcho"}},
+        }))
+        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
+        assert cfg.memory_mode == "honcho"
+
+    def test_object_form_sets_default_and_overrides(self, tmp_path):
+        cfg_file = tmp_path / "config.json"
+        cfg_file.write_text(json.dumps({
+            "apiKey": "k",
+            "hosts": {"hermes": {"memoryMode": {
+                "default": "hybrid",
+                "hermes": "honcho",
+                "sentinel": "local",
+            }}},
+        }))
+        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
+        assert cfg.memory_mode == "hybrid"
+        assert cfg.peer_memory_mode("hermes") == "honcho"
+        assert cfg.peer_memory_mode("sentinel") == "local"
+        assert cfg.peer_memory_mode("unknown") == "hybrid"  # falls through to default
+
+    def test_object_form_no_default_falls_back_to_hybrid(self, tmp_path):
+        cfg_file = tmp_path / "config.json"
+        cfg_file.write_text(json.dumps({
+            "apiKey": "k",
+            "hosts": {"hermes": {"memoryMode": {"hermes": "honcho"}}},
+        }))
+        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
+        assert cfg.memory_mode == "hybrid"
+        assert cfg.peer_memory_mode("hermes") == "honcho"
+        assert cfg.peer_memory_mode("other") == "hybrid"
+
+    def test_global_string_host_object_override(self, tmp_path):
+        """Host object form overrides global string."""
+        cfg_file = tmp_path / "config.json"
+        cfg_file.write_text(json.dumps({
+            "apiKey": "k",
+            "memoryMode": "local",
+            "hosts": {"hermes": {"memoryMode": {"default": "hybrid", "hermes": "honcho"}}},
+        }))
+        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
+        assert cfg.memory_mode == "hybrid"  # host default wins over global "local"
+        assert cfg.peer_memory_mode("hermes") == "honcho"
+
+
+# ---------------------------------------------------------------------------
+# resolve_session_name with session_title
+# ---------------------------------------------------------------------------
+
+class TestResolveSessionNameTitle:
+    def test_manual_override_beats_title(self):
+        cfg = HonchoClientConfig(sessions={"/my/project": "manual-name"})
+        result = cfg.resolve_session_name("/my/project", session_title="the-title")
+        assert result == "manual-name"
+
+    def test_title_beats_dirname(self):
+        cfg = HonchoClientConfig()
+        result = cfg.resolve_session_name("/some/dir", session_title="my-project")
+        assert result == "my-project"
+
+    def test_title_with_peer_prefix(self):
+        cfg = HonchoClientConfig(peer_name="eri", session_peer_prefix=True)
+        result = cfg.resolve_session_name("/some/dir", session_title="aeris")
+        assert result == "eri-aeris"
+
+    def test_title_sanitized(self):
+        cfg = HonchoClientConfig()
+        result = cfg.resolve_session_name("/some/dir", session_title="my project/name!")
+        # trailing dashes stripped by .strip('-')
+        assert result == "my-project-name"
+
+    def test_title_all_invalid_chars_falls_back_to_dirname(self):
+        cfg = HonchoClientConfig()
+        result = cfg.resolve_session_name("/some/dir", session_title="!!! ###")
+        # sanitized to empty → falls back to dirname
+        assert result == "dir"
+
+    def test_none_title_falls_back_to_dirname(self):
+        cfg = HonchoClientConfig()
+        result = cfg.resolve_session_name("/some/dir", session_title=None)
+        assert result == "dir"
+
+    def test_empty_title_falls_back_to_dirname(self):
+        cfg = HonchoClientConfig()
+        result = cfg.resolve_session_name("/some/dir", session_title="")
+        assert result == "dir"
+
+    def test_per_session_uses_session_id(self):
+        cfg = HonchoClientConfig(session_strategy="per-session")
+        result = cfg.resolve_session_name("/some/dir", session_id="20260309_175514_9797dd")
+        assert result == "20260309_175514_9797dd"
+
+    def test_per_session_with_peer_prefix(self):
+        cfg = HonchoClientConfig(session_strategy="per-session", peer_name="eri", session_peer_prefix=True)
+        result = cfg.resolve_session_name("/some/dir", session_id="20260309_175514_9797dd")
+        assert result == "eri-20260309_175514_9797dd"
+
+    def test_per_session_no_id_falls_back_to_dirname(self):
+        cfg = HonchoClientConfig(session_strategy="per-session")
+        result = cfg.resolve_session_name("/some/dir", session_id=None)
+        assert result == "dir"
+
+    def test_title_beats_session_id(self):
+        cfg = HonchoClientConfig(session_strategy="per-session")
+        result = cfg.resolve_session_name("/some/dir", session_title="my-title", session_id="20260309_175514_9797dd")
+        assert result == "my-title"
+
+    def test_manual_beats_session_id(self):
+        cfg = HonchoClientConfig(session_strategy="per-session", sessions={"/some/dir": "pinned"})
+        result = cfg.resolve_session_name("/some/dir", session_id="20260309_175514_9797dd")
+        assert result == "pinned"
+
+    def test_global_strategy_returns_workspace(self):
+        cfg = HonchoClientConfig(session_strategy="global", workspace_id="my-workspace")
+        result = cfg.resolve_session_name("/some/dir")
+        assert result == "my-workspace"
+
+
+# ---------------------------------------------------------------------------
+# save() routing per write_frequency
+# ---------------------------------------------------------------------------
+
+class TestSaveRouting:
+    def _make_session_with_message(self, mgr=None):
+        sess = _make_session()
+        sess.add_message("user", "hello")
+        sess.add_message("assistant", "hi")
+        if mgr:
+            mgr._cache[sess.key] = sess
+        return sess
+
+    def test_turn_flushes_immediately(self):
+        mgr = _make_manager(write_frequency="turn")
+        sess = self._make_session_with_message(mgr)
+        with patch.object(mgr, "_flush_session") as mock_flush:
+            mgr.save(sess)
+            mock_flush.assert_called_once_with(sess)
+
+    def test_session_mode_does_not_flush(self):
+        mgr = _make_manager(write_frequency="session")
+        sess = self._make_session_with_message(mgr)
+        with patch.object(mgr, "_flush_session") as mock_flush:
+            mgr.save(sess)
+            mock_flush.assert_not_called()
+
+    def test_async_mode_enqueues(self):
+        mgr = _make_manager(write_frequency="async")
+        sess = self._make_session_with_message(mgr)
+        with patch.object(mgr, "_flush_session") as mock_flush:
+            mgr.save(sess)
+            # flush_session should NOT be called synchronously
+            mock_flush.assert_not_called()
+        assert not mgr._async_queue.empty()
+
+    def test_int_frequency_flushes_on_nth_turn(self):
+        mgr = _make_manager(write_frequency=3)
+        sess = self._make_session_with_message(mgr)
+        with patch.object(mgr, "_flush_session") as mock_flush:
+            mgr.save(sess)  # turn 1
+            mgr.save(sess)  # turn 2
+            assert mock_flush.call_count == 0
+            mgr.save(sess)  # turn 3
+            assert mock_flush.call_count == 1
+
+    def test_int_frequency_skips_other_turns(self):
+        mgr = _make_manager(write_frequency=5)
+        sess = self._make_session_with_message(mgr)
+        with patch.object(mgr, "_flush_session") as mock_flush:
+            for _ in range(4):
+                mgr.save(sess)
+            assert mock_flush.call_count == 0
+            mgr.save(sess)  # turn 5
+            assert mock_flush.call_count == 1
+
+
+# ---------------------------------------------------------------------------
+# flush_all()
+# ---------------------------------------------------------------------------
+
+class TestFlushAll:
+    def test_flushes_all_cached_sessions(self):
+        mgr = _make_manager(write_frequency="session")
+        s1 = _make_session(key="s1", honcho_session_id="s1")
+        s2 = _make_session(key="s2", honcho_session_id="s2")
+        s1.add_message("user", "a")
+        s2.add_message("user", "b")
+        mgr._cache = {"s1": s1, "s2": s2}
+
+        with patch.object(mgr, "_flush_session") as mock_flush:
+            mgr.flush_all()
+            assert mock_flush.call_count == 2
+
+    def test_flush_all_drains_async_queue(self):
+        mgr = _make_manager(write_frequency="async")
+        sess = _make_session()
+        sess.add_message("user", "pending")
+        mgr._async_queue.put(sess)
+
+        with patch.object(mgr, "_flush_session") as mock_flush:
+            mgr.flush_all()
+            # Called at least once for the queued item
+            assert mock_flush.call_count >= 1
+
+    def test_flush_all_tolerates_errors(self):
+        mgr = _make_manager(write_frequency="session")
+        sess = _make_session()
+        mgr._cache = {"key": sess}
+        with patch.object(mgr, "_flush_session", side_effect=RuntimeError("oops")):
+            # Should not raise
+            mgr.flush_all()
+
+
+# ---------------------------------------------------------------------------
+# async writer thread lifecycle
+# ---------------------------------------------------------------------------
+
+class TestAsyncWriterThread:
+    def test_thread_started_on_async_mode(self):
+        mgr = _make_manager(write_frequency="async")
+        assert mgr._async_thread is not None
+        assert mgr._async_thread.is_alive()
+        mgr.shutdown()
+
+    def test_no_thread_for_turn_mode(self):
+        mgr = _make_manager(write_frequency="turn")
+        assert mgr._async_thread is None
+        assert mgr._async_queue is None
+
+    def test_shutdown_joins_thread(self):
+        mgr = _make_manager(write_frequency="async")
+        assert mgr._async_thread.is_alive()
+        mgr.shutdown()
+        assert not mgr._async_thread.is_alive()
+
+    def test_async_writer_calls_flush(self):
+        mgr = _make_manager(write_frequency="async")
+        sess = _make_session()
+        sess.add_message("user", "async msg")
+
+        flushed = []
+        original = mgr._flush_session
+
+        def capture(s):
+            flushed.append(s)
+
+        mgr._flush_session = capture
+        mgr._async_queue.put(sess)
+        # Give the daemon thread time to process
+        deadline = time.time() + 2.0
+        while not flushed and time.time() < deadline:
+            time.sleep(0.05)
+
+        mgr.shutdown()
+        assert len(flushed) == 1
+        assert flushed[0] is sess
+
+    def test_shutdown_sentinel_stops_loop(self):
+        mgr = _make_manager(write_frequency="async")
+        thread = mgr._async_thread
+        mgr.shutdown()
+        thread.join(timeout=3)
+        assert not thread.is_alive()
+
+
+# ---------------------------------------------------------------------------
+# async retry on failure
+# ---------------------------------------------------------------------------
+
+class TestAsyncWriterRetry:
+    def test_retries_once_on_failure(self):
+        mgr = _make_manager(write_frequency="async")
+        sess = _make_session()
+        sess.add_message("user", "msg")
+
+        call_count = [0]
+
+        def flaky_flush(s):
+            call_count[0] += 1
+            if call_count[0] == 1:
+                raise ConnectionError("network blip")
+            # second call succeeds silently
+
+        mgr._flush_session = flaky_flush
+
+        with patch("time.sleep"):  # skip the 2s sleep in retry
+            mgr._async_queue.put(sess)
+            deadline = time.time() + 3.0
+            while call_count[0] < 2 and time.time() < deadline:
+                time.sleep(0.05)
+
+        mgr.shutdown()
+        assert call_count[0] == 2
+
+    def test_drops_after_two_failures(self):
+        mgr = _make_manager(write_frequency="async")
+        sess = _make_session()
+        sess.add_message("user", "msg")
+
+        call_count = [0]
+
+        def always_fail(s):
+            call_count[0] += 1
+            raise RuntimeError("always broken")
+
+        mgr._flush_session = always_fail
+
+        with patch("time.sleep"):
+            mgr._async_queue.put(sess)
+            deadline = time.time() + 3.0
+            while call_count[0] < 2 and time.time() < deadline:
+                time.sleep(0.05)
+
+        mgr.shutdown()
+        # Should have tried exactly twice (initial + one retry) and not crashed
+        assert call_count[0] == 2
+        assert not mgr._async_thread.is_alive()
+
+
+# ---------------------------------------------------------------------------
+# HonchoClientConfig dataclass defaults for new fields
+# ---------------------------------------------------------------------------
+
+class TestNewConfigFieldDefaults:
+    def test_write_frequency_default(self):
+        cfg = HonchoClientConfig()
+        assert cfg.write_frequency == "async"
+
+    def test_memory_mode_default(self):
+        cfg = HonchoClientConfig()
+        assert cfg.memory_mode == "hybrid"
+
+    def test_write_frequency_set(self):
+        cfg = HonchoClientConfig(write_frequency="turn")
+        assert cfg.write_frequency == "turn"
+
+    def test_memory_mode_set(self):
+        cfg = HonchoClientConfig(memory_mode="honcho")
+        assert cfg.memory_mode == "honcho"
+
+    def test_peer_memory_mode_falls_back_to_global(self):
+        cfg = HonchoClientConfig(memory_mode="honcho")
+        assert cfg.peer_memory_mode("any-peer") == "honcho"
+
+    def test_peer_memory_mode_override(self):
+        cfg = HonchoClientConfig(memory_mode="hybrid", peer_memory_modes={"hermes": "local"})
+        assert cfg.peer_memory_mode("hermes") == "local"
+        assert cfg.peer_memory_mode("other") == "hybrid"
diff --git a/tests/honcho_integration/test_client.py b/tests/honcho_integration/test_client.py
index bc4a16f9..1a46e797 100644
--- a/tests/honcho_integration/test_client.py
+++ b/tests/honcho_integration/test_client.py
@@ -25,7 +25,8 @@ class TestHonchoClientConfigDefaults:
         assert config.environment == "production"
         assert config.enabled is False
         assert config.save_messages is True
-        assert config.session_strategy == "per-directory"
+        assert config.session_strategy == "per-session"
+        assert config.recall_mode == "auto"
         assert config.session_peer_prefix is False
         assert config.linked_hosts == []
         assert config.sessions == {}
@@ -134,6 +135,41 @@ class TestFromGlobalConfig:
         assert config.workspace_id == "root-ws"
         assert config.ai_peer == "root-ai"
 
+    def test_session_strategy_default_from_global_config(self, tmp_path):
+        """from_global_config with no sessionStrategy should match dataclass default."""
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({"apiKey": "key"}))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.session_strategy == "per-session"
+
+    def test_context_tokens_host_block_wins(self, tmp_path):
+        """Host block contextTokens should override root."""
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({
+            "apiKey": "key",
+            "contextTokens": 1000,
+            "hosts": {"hermes": {"contextTokens": 2000}},
+        }))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.context_tokens == 2000
+
+    def test_recall_mode_from_config(self, tmp_path):
+        """recallMode is read from config, host block wins."""
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({
+            "apiKey": "key",
+            "recallMode": "tools",
+            "hosts": {"hermes": {"recallMode": "context"}},
+        }))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.recall_mode == "context"
+
+    def test_recall_mode_default(self, tmp_path):
+        config_file = tmp_path / "config.json"
+        config_file.write_text(json.dumps({"apiKey": "key"}))
+        config = HonchoClientConfig.from_global_config(config_path=config_file)
+        assert config.recall_mode == "auto"
+
     def test_corrupt_config_falls_back_to_env(self, tmp_path):
         config_file = tmp_path / "config.json"
         config_file.write_text("not valid json{{{")
@@ -177,6 +213,40 @@ class TestResolveSessionName:
         # Should use os.getcwd() basename
         assert result == Path.cwd().name
 
+    def test_per_repo_uses_git_root(self):
+        config = HonchoClientConfig(session_strategy="per-repo")
+        with patch.object(
+            HonchoClientConfig, "_git_repo_name", return_value="hermes-agent"
+        ):
+            result = config.resolve_session_name("/home/user/hermes-agent/subdir")
+        assert result == "hermes-agent"
+
+    def test_per_repo_with_peer_prefix(self):
+        config = HonchoClientConfig(
+            session_strategy="per-repo", peer_name="eri", session_peer_prefix=True
+        )
+        with patch.object(
+            HonchoClientConfig, "_git_repo_name", return_value="groudon"
+        ):
+            result = config.resolve_session_name("/home/user/groudon/src")
+        assert result == "eri-groudon"
+
+    def test_per_repo_falls_back_to_dirname_outside_git(self):
+        config = HonchoClientConfig(session_strategy="per-repo")
+        with patch.object(
+            HonchoClientConfig, "_git_repo_name", return_value=None
+        ):
+            result = config.resolve_session_name("/home/user/not-a-repo")
+        assert result == "not-a-repo"
+
+    def test_per_repo_manual_override_still_wins(self):
+        config = HonchoClientConfig(
+            session_strategy="per-repo",
+            sessions={"/home/user/proj": "custom-session"},
+        )
+        result = config.resolve_session_name("/home/user/proj")
+        assert result == "custom-session"
+
 
 class TestGetLinkedWorkspaces:
     def test_resolves_linked_hosts(self):
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index d54cf6cd..feee2e56 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -1640,6 +1640,25 @@ def _cleanup_old_recordings(max_age_hours=72):
         logger.debug("Recording cleanup error (non-critical): %s", e)
 
 
+def _cleanup_old_recordings(max_age_hours=72):
+    """Remove browser recordings older than max_age_hours to prevent disk bloat."""
+    import time
+    try:
+        hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
+        recordings_dir = hermes_home / "browser_recordings"
+        if not recordings_dir.exists():
+            return
+        cutoff = time.time() - (max_age_hours * 3600)
+        for f in recordings_dir.glob("session_*.webm"):
+            try:
+                if f.stat().st_mtime < cutoff:
+                    f.unlink()
+            except Exception:
+                pass
+    except Exception:
+        pass
+
+
 # ============================================================================
 # Cleanup and Management Functions
 # ============================================================================
diff --git a/tools/honcho_tools.py b/tools/honcho_tools.py
index a701c646..62987dc6 100644
--- a/tools/honcho_tools.py
+++ b/tools/honcho_tools.py
@@ -1,8 +1,16 @@
-"""Honcho tool for querying user context via dialectic reasoning.
+"""Honcho tools for user context retrieval.
 
-Registers ``query_user_context`` -- an LLM-callable tool that asks Honcho
-about the current user's history, preferences, goals, and communication
-style. The session key is injected at runtime by the agent loop via
+Registers three complementary tools, ordered by capability:
+
+  query_user_context   — dialectic Q&A (LLM-powered, direct answers)
+  honcho_search        — semantic search (fast, no LLM, raw excerpts)
+  honcho_profile       — peer card (fast, no LLM, structured facts)
+
+Use query_user_context when you need Honcho to synthesize an answer.
+Use honcho_search or honcho_profile when you want raw data to reason
+over yourself.
+
+The session key is injected at runtime by the agent loop via
 ``set_session_context()``.
 """
 
@@ -34,54 +42,6 @@ def clear_session_context() -> None:
     _session_key = None
 
 
-# ── Tool schema ──
-
-HONCHO_TOOL_SCHEMA = {
-    "name": "query_user_context",
-    "description": (
-        "Query Honcho to retrieve relevant context about the user based on their "
-        "history and preferences. Use this when you need to understand the user's "
-        "background, preferences, past interactions, or goals. This helps you "
-        "personalize your responses and provide more relevant assistance."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {
-                "type": "string",
-                "description": (
-                    "A natural language question about the user. Examples: "
-                    "'What are this user's main goals?', "
-                    "'What communication style does this user prefer?', "
-                    "'What topics has this user discussed recently?', "
-                    "'What is this user's technical expertise level?'"
-                ),
-            }
-        },
-        "required": ["query"],
-    },
-}
-
-
-# ── Tool handler ──
-
-def _handle_query_user_context(args: dict, **kw) -> str:
-    """Execute the Honcho context query."""
-    query = args.get("query", "")
-    if not query:
-        return json.dumps({"error": "Missing required parameter: query"})
-
-    if not _session_manager or not _session_key:
-        return json.dumps({"error": "Honcho is not active for this session."})
-
-    try:
-        result = _session_manager.get_user_context(_session_key, query)
-        return json.dumps({"result": result})
-    except Exception as e:
-        logger.error("Error querying Honcho user context: %s", e)
-        return json.dumps({"error": f"Failed to query user context: {e}"})
-
-
 # ── Availability check ──
 
 def _check_honcho_available() -> bool:
@@ -89,14 +49,145 @@ def _check_honcho_available() -> bool:
     return _session_manager is not None and _session_key is not None
 
 
+# ── honcho_profile ──
+
+_PROFILE_SCHEMA = {
+    "name": "honcho_profile",
+    "description": (
+        "Retrieve the user's peer card from Honcho — a curated list of key facts "
+        "about them (name, role, preferences, communication style, patterns). "
+        "Fast, no LLM reasoning, minimal cost. "
+        "Use this at conversation start or when you need a quick factual snapshot. "
+        "Use query_user_context instead when you need Honcho to synthesize an answer."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {},
+        "required": [],
+    },
+}
+
+
+def _handle_honcho_profile(args: dict, **kw) -> str:
+    if not _session_manager or not _session_key:
+        return json.dumps({"error": "Honcho is not active for this session."})
+    try:
+        card = _session_manager.get_peer_card(_session_key)
+        if not card:
+            return json.dumps({"result": "No profile facts available yet. The user's profile builds over time through conversations."})
+        return json.dumps({"result": card})
+    except Exception as e:
+        logger.error("Error fetching Honcho peer card: %s", e)
+        return json.dumps({"error": f"Failed to fetch profile: {e}"})
+
+
+# ── honcho_search ──
+
+_SEARCH_SCHEMA = {
+    "name": "honcho_search",
+    "description": (
+        "Semantic search over Honcho's stored context about the user. "
+        "Returns raw excerpts ranked by relevance to your query — no LLM synthesis. "
+        "Cheaper and faster than query_user_context. "
+        "Good when you want to find specific past facts and reason over them yourself. "
+        "Use query_user_context when you need a direct synthesized answer."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "string",
+                "description": "What to search for in Honcho's memory (e.g. 'programming languages', 'past projects', 'timezone').",
+            },
+            "max_tokens": {
+                "type": "integer",
+                "description": "Token budget for returned context (default 800, max 2000).",
+            },
+        },
+        "required": ["query"],
+    },
+}
+
+
+def _handle_honcho_search(args: dict, **kw) -> str:
+    query = args.get("query", "")
+    if not query:
+        return json.dumps({"error": "Missing required parameter: query"})
+    if not _session_manager or not _session_key:
+        return json.dumps({"error": "Honcho is not active for this session."})
+    max_tokens = min(int(args.get("max_tokens", 800)), 2000)
+    try:
+        result = _session_manager.search_context(_session_key, query, max_tokens=max_tokens)
+        if not result:
+            return json.dumps({"result": "No relevant context found."})
+        return json.dumps({"result": result})
+    except Exception as e:
+        logger.error("Error searching Honcho context: %s", e)
+        return json.dumps({"error": f"Failed to search context: {e}"})
+
+
+# ── query_user_context (dialectic — LLM-powered) ──
+
+_QUERY_SCHEMA = {
+    "name": "query_user_context",
+    "description": (
+        "Ask Honcho a natural language question about the user and get a synthesized answer. "
+        "Uses Honcho's LLM (dialectic reasoning) — higher cost than honcho_profile or honcho_search. "
+        "Use this when you need a direct answer synthesized from the user's full history. "
+        "Examples: 'What are this user's main goals?', 'How does this user prefer to communicate?', "
+        "'What is this user's technical expertise level?'"
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "string",
+                "description": "A natural language question about the user.",
+            }
+        },
+        "required": ["query"],
+    },
+}
+
+
+def _handle_query_user_context(args: dict, **kw) -> str:
+    query = args.get("query", "")
+    if not query:
+        return json.dumps({"error": "Missing required parameter: query"})
+    if not _session_manager or not _session_key:
+        return json.dumps({"error": "Honcho is not active for this session."})
+    try:
+        result = _session_manager.dialectic_query(_session_key, query)
+        return json.dumps({"result": result or "No result from Honcho."})
+    except Exception as e:
+        logger.error("Error querying Honcho user context: %s", e)
+        return json.dumps({"error": f"Failed to query user context: {e}"})
+
+
 # ── Registration ──
 
 from tools.registry import registry
 
+registry.register(
+    name="honcho_profile",
+    toolset="honcho",
+    schema=_PROFILE_SCHEMA,
+    handler=_handle_honcho_profile,
+    check_fn=_check_honcho_available,
+)
+
+registry.register(
+    name="honcho_search",
+    toolset="honcho",
+    schema=_SEARCH_SCHEMA,
+    handler=_handle_honcho_search,
+    check_fn=_check_honcho_available,
+)
+
 registry.register(
     name="query_user_context",
     toolset="honcho",
-    schema=HONCHO_TOOL_SCHEMA,
+    schema=_QUERY_SCHEMA,
     handler=_handle_query_user_context,
     check_fn=_check_honcho_available,
 )
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
index 8ca0f072..edc5c9b1 100644
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@@ -673,6 +673,7 @@ checkpoints:
   max_snapshots: 50              # Max checkpoints to keep per directory
 ```
 
+
 ## Delegation
 
 Configure subagent behavior for the delegate tool:
diff --git a/website/docs/user-guide/messaging/slack.md b/website/docs/user-guide/messaging/slack.md
index 65d27ee8..48608f68 100644
--- a/website/docs/user-guide/messaging/slack.md
+++ b/website/docs/user-guide/messaging/slack.md
@@ -91,6 +91,7 @@ You can always find or regenerate app-level tokens under **Settings → Basic In
 
 This step is critical — it controls what messages the bot can see.
 
+
 1. In the sidebar, go to **Features → Event Subscriptions**
 2. Toggle **Enable Events** to ON
 3. Expand **Subscribe to bot events** and add:
@@ -110,6 +111,7 @@ If the bot works in DMs but **not in channels**, you almost certainly forgot to
 Without these events, Slack simply never delivers channel messages to the bot.
 :::
 
+
 ---
 
 ## Step 5: Install App to Workspace
@@ -200,6 +202,7 @@ This is intentional — it prevents the bot from responding to every message in
 
 ---
 
+
 ## Home Channel
 
 Set `SLACK_HOME_CHANNEL` to a channel ID where Hermes will deliver scheduled messages,

From b4af03aea8595a56e8f40fbd1b96dbffae2295fb Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Mon, 9 Mar 2026 16:35:19 -0400
Subject: [PATCH 02/23] fix(honcho): clarify API key signup instructions

Tell users to go to app.honcho.dev > Settings > API Keys.
Updated in setup walkthrough, setup prompt, and client error message.
---
 honcho_integration/cli.py    | 12 ++++++------
 honcho_integration/client.py |  6 +++---
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py
index feb3ee0f..d954b14f 100644
--- a/honcho_integration/cli.py
+++ b/honcho_integration/cli.py
@@ -97,7 +97,7 @@ def cmd_setup(args) -> None:
         cfg["apiKey"] = new_key
 
     if not cfg.get("apiKey"):
-        print("\n  No API key configured. Get one at https://app.honcho.dev")
+        print("\n  No API key configured. Get your API key at https://app.honcho.dev")
         print("  Run 'hermes honcho setup' again once you have a key.\n")
         return
 
@@ -542,12 +542,12 @@ def cmd_migrate(args) -> None:
         print(f"  Honcho API key already configured: {masked}")
         print("  Skip to Step 2.")
     else:
-        print("  Honcho is a cloud memory service. You need a free account to use it.")
+        print("  Honcho is a cloud memory service that gives Hermes persistent memory")
+        print("  across sessions. You need an API key to use it.")
         print()
-        print("  1. Go to https://app.honcho.dev and create an account.")
-        print("  2. Copy your API key from the dashboard.")
-        print("  3. Run:  hermes honcho setup")
-        print("     This will store the key and create a workspace for this project.")
+        print("  1. Get your API key at https://app.honcho.dev")
+        print("  2. Run:  hermes honcho setup")
+        print("     Paste the key when prompted.")
         print()
         answer = _prompt("  Run 'hermes honcho setup' now?", default="y")
         if answer.lower() in ("y", "yes"):
diff --git a/honcho_integration/client.py b/honcho_integration/client.py
index f1d95b2e..90b36902 100644
--- a/honcho_integration/client.py
+++ b/honcho_integration/client.py
@@ -311,9 +311,9 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
 
     if not config.api_key:
         raise ValueError(
-            "Honcho API key not found. Set it in ~/.honcho/config.json "
-            "or the HONCHO_API_KEY environment variable. "
-            "Get an API key from https://app.honcho.dev"
+            "Honcho API key not found. "
+            "Get your API key at https://app.honcho.dev, "
+            "then run 'hermes honcho setup' or set HONCHO_API_KEY."
         )
 
     try:

From 6782249df935f4bbeed41e1b2b9d552d78ce16c4 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Mon, 9 Mar 2026 17:41:21 -0400
Subject: [PATCH 03/23] fix(honcho): rewrite tokens and peer CLI help for
 clarity

Explain what context vs dialectic actually do in plain language:
context = raw memory retrieval, dialectic = AI-to-AI inference
for session continuity. Describe what user/AI peer cards are.
---
 honcho_integration/cli.py | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py
index d954b14f..636d0be7 100644
--- a/honcho_integration/cli.py
+++ b/honcho_integration/cli.py
@@ -320,13 +320,19 @@ def cmd_peer(args) -> None:
         # Show current values
         hosts = cfg.get("hosts", {})
         hermes = hosts.get(HOST, {})
-        print(f"\nHoncho peer config\n" + "─" * 40)
-        print(f"  User peer:        {cfg.get('peerName') or '(not set)'}")
-        print(f"  AI peer:          {hermes.get('aiPeer') or cfg.get('aiPeer') or HOST}")
+        user = cfg.get('peerName') or '(not set)'
+        ai = hermes.get('aiPeer') or cfg.get('aiPeer') or HOST
         lvl = hermes.get("dialecticReasoningLevel") or cfg.get("dialecticReasoningLevel") or "low"
         max_chars = hermes.get("dialecticMaxChars") or cfg.get("dialecticMaxChars") or 600
-        print(f"  Dialectic level:  {lvl}  (options: {', '.join(REASONING_LEVELS)})")
-        print(f"  Dialectic cap:    {max_chars} chars\n")
+        print(f"\nHoncho peers\n" + "─" * 40)
+        print(f"  User peer:   {user}")
+        print(f"    Your identity in Honcho. Messages you send build this peer's card.")
+        print(f"  AI peer:     {ai}")
+        print(f"    Hermes' identity in Honcho. Seed with 'hermes honcho identity <file>'.")
+        print(f"    Dialectic calls ask this peer questions to warm session context.")
+        print()
+        print(f"  Dialectic reasoning:  {lvl}  ({', '.join(REASONING_LEVELS)})")
+        print(f"  Dialectic cap:        {max_chars} chars\n")
         return
 
     if user_name is not None:
@@ -397,13 +403,17 @@ def cmd_tokens(args) -> None:
         ctx_tokens = hermes.get("contextTokens") or cfg.get("contextTokens") or "(Honcho default)"
         d_chars = hermes.get("dialecticMaxChars") or cfg.get("dialecticMaxChars") or 600
         d_level = hermes.get("dialecticReasoningLevel") or cfg.get("dialecticReasoningLevel") or "low"
-        print(f"\nHoncho token settings\n" + "─" * 40)
-        print(f"  context tokens:   {ctx_tokens}")
-        print(f"    Max tokens Honcho returns from session.context() per turn.")
-        print(f"    Injected into Hermes system prompt — counts against your LLM budget.")
-        print(f"  dialectic cap:    {d_chars} chars")
-        print(f"    Max chars of peer.chat() result injected per turn.")
-        print(f"  dialectic level:  {d_level}  (controls Honcho-side inference depth)")
+        print(f"\nHoncho budgets\n" + "─" * 40)
+        print()
+        print(f"  Context     {ctx_tokens} tokens")
+        print(f"    Raw memory retrieval. Honcho returns stored facts/history about")
+        print(f"    the user and session, injected directly into the system prompt.")
+        print()
+        print(f"  Dialectic   {d_chars} chars, reasoning: {d_level}")
+        print(f"    AI-to-AI inference. Hermes asks Honcho's AI peer a question")
+        print(f"    (e.g. \"what were we working on?\") and Honcho runs its own model")
+        print(f"    to synthesize an answer. Used for first-turn session continuity.")
+        print(f"    Level controls how much reasoning Honcho spends on the answer.")
         print(f"\n  Set with: hermes honcho tokens [--context N] [--dialectic N]\n")
         return
 

From c1228e9a4a7314db1c26b92c39e33169a387ae26 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Mon, 9 Mar 2026 17:46:51 -0400
Subject: [PATCH 04/23] refactor(honcho): rename recallMode "auto" to "hybrid"

Matches the mental model: hybrid = context + tools,
context = context only, tools = tools only.
---
 honcho_integration/cli.py               | 6 +++---
 honcho_integration/client.py            | 6 +++---
 run_agent.py                            | 4 ++--
 tests/honcho_integration/test_client.py | 4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py
index 636d0be7..d568aa93 100644
--- a/honcho_integration/cli.py
+++ b/honcho_integration/cli.py
@@ -147,13 +147,13 @@ def cmd_setup(args) -> None:
         cfg["writeFrequency"] = new_wf if new_wf in ("async", "turn", "session") else "async"
 
     # Recall mode
-    current_recall = cfg.get("recallMode", "auto")
+    current_recall = cfg.get("recallMode", "hybrid")
     print(f"\n  Recall mode options:")
-    print("    auto    — pre-warmed context + memory tools available (default)")
+    print("    hybrid  — pre-warmed context + memory tools available (default)")
     print("    context — pre-warmed context only, memory tools suppressed")
     print("    tools   — no pre-loaded context, rely on tool calls only")
     new_recall = _prompt("Recall mode", default=current_recall)
-    if new_recall in ("auto", "context", "tools"):
+    if new_recall in ("hybrid", "context", "tools"):
         cfg["recallMode"] = new_recall
 
     # Session strategy
diff --git a/honcho_integration/client.py b/honcho_integration/client.py
index 90b36902..3f3f174d 100644
--- a/honcho_integration/client.py
+++ b/honcho_integration/client.py
@@ -90,10 +90,10 @@ class HonchoClientConfig:
     # Max chars of dialectic result to inject into Hermes system prompt
     dialectic_max_chars: int = 600
     # Recall mode: how memory retrieval works when Honcho is active.
-    # "auto"    — pre-warmed context + memory tools available (model decides)
+    # "hybrid"  — pre-warmed context + memory tools available (model decides)
     # "context" — pre-warmed context only, honcho memory tools removed
     # "tools"   — no pre-loaded context, rely on tool calls only
-    recall_mode: str = "auto"
+    recall_mode: str = "hybrid"
     # Session resolution
     session_strategy: str = "per-session"
     session_peer_prefix: bool = False
@@ -199,7 +199,7 @@ class HonchoClientConfig:
             recall_mode=(
                 host_block.get("recallMode")
                 or raw.get("recallMode")
-                or "auto"
+                or "hybrid"
             ),
             session_strategy=raw.get("sessionStrategy", "per-session"),
             session_peer_prefix=raw.get("sessionPeerPrefix", False),
diff --git a/run_agent.py b/run_agent.py
index a5133b02..0984f703 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1580,7 +1580,7 @@ class AIAgent:
             hcfg = self._honcho_config
             mode = hcfg.memory_mode if hcfg else "hybrid"
             freq = hcfg.write_frequency if hcfg else "async"
-            recall_mode = hcfg.recall_mode if hcfg else "auto"
+            recall_mode = hcfg.recall_mode if hcfg else "hybrid"
             honcho_block = (
                 "# Honcho memory integration\n"
                 f"Active. Session: {self._honcho_session_key}. "
@@ -3382,7 +3382,7 @@ class AIAgent:
         # Honcho: read cached context from last turn's background fetch (non-blocking),
         # then fire both fetches for next turn.  Skip in "tools" mode (no context injection).
         self._honcho_context = ""
-        _recall_mode = (self._honcho_config.recall_mode if self._honcho_config else "auto")
+        _recall_mode = (self._honcho_config.recall_mode if self._honcho_config else "hybrid")
         if self._honcho and self._honcho_session_key and not conversation_history and _recall_mode != "tools":
             try:
                 self._honcho_context = self._honcho_prefetch(user_message)
diff --git a/tests/honcho_integration/test_client.py b/tests/honcho_integration/test_client.py
index 1a46e797..d779d9a6 100644
--- a/tests/honcho_integration/test_client.py
+++ b/tests/honcho_integration/test_client.py
@@ -26,7 +26,7 @@ class TestHonchoClientConfigDefaults:
         assert config.enabled is False
         assert config.save_messages is True
         assert config.session_strategy == "per-session"
-        assert config.recall_mode == "auto"
+        assert config.recall_mode == "hybrid"
         assert config.session_peer_prefix is False
         assert config.linked_hosts == []
         assert config.sessions == {}
@@ -168,7 +168,7 @@ class TestFromGlobalConfig:
         config_file = tmp_path / "config.json"
         config_file.write_text(json.dumps({"apiKey": "key"}))
         config = HonchoClientConfig.from_global_config(config_path=config_file)
-        assert config.recall_mode == "auto"
+        assert config.recall_mode == "hybrid"
 
     def test_corrupt_config_falls_back_to_env(self, tmp_path):
         config_file = tmp_path / "config.json"

From 792be0e8e3fc2e5a2862fe48f67a0a6ca49a8b2a Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Mon, 9 Mar 2026 17:55:31 -0400
Subject: [PATCH 05/23] feat(honcho): add honcho_conclude tool for writing
 facts back to memory

New tool lets Hermes persist conclusions about the user (preferences,
corrections, project context) directly to Honcho via the conclusions
API. Feeds into the user's peer card and representation.
---
 honcho_integration/session.py | 35 ++++++++++++++++++++++++
 run_agent.py                  | 16 ++++++-----
 tools/honcho_tools.py         | 51 +++++++++++++++++++++++++++++++++++
 3 files changed, 95 insertions(+), 7 deletions(-)

diff --git a/honcho_integration/session.py b/honcho_integration/session.py
index 4a49ca43..384d42f5 100644
--- a/honcho_integration/session.py
+++ b/honcho_integration/session.py
@@ -805,6 +805,41 @@ class HonchoSessionManager:
             logger.debug("Honcho search_context failed: %s", e)
             return ""
 
+    def create_conclusion(self, session_key: str, content: str) -> bool:
+        """Write a conclusion about the user back to Honcho.
+
+        Conclusions are facts the AI peer observes about the user —
+        preferences, corrections, clarifications, project context.
+        They feed into the user's peer card and representation.
+
+        Args:
+            session_key: Session to associate the conclusion with.
+            content: The conclusion text (e.g. "User prefers dark mode").
+
+        Returns:
+            True on success, False on failure.
+        """
+        if not content or not content.strip():
+            return False
+
+        session = self._cache.get(session_key)
+        if not session:
+            logger.warning("No session cached for '%s', skipping conclusion", session_key)
+            return False
+
+        assistant_peer = self._get_or_create_peer(session.assistant_peer_id)
+        try:
+            conclusions_scope = assistant_peer.conclusions_of(session.user_peer_id)
+            conclusions_scope.create([{
+                "content": content.strip(),
+                "session_id": session.honcho_session_id,
+            }])
+            logger.info("Created conclusion for %s: %s", session_key, content[:80])
+            return True
+        except Exception as e:
+            logger.error("Failed to create conclusion: %s", e)
+            return False
+
     def seed_ai_identity(self, session_key: str, content: str, source: str = "manual") -> bool:
         """
         Seed the AI peer's Honcho representation from text content.
diff --git a/run_agent.py b/run_agent.py
index 0984f703..fb20f067 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1595,22 +1595,24 @@ class AIAgent:
                 )
             elif recall_mode == "tools":
                 honcho_block += (
-                    "Memory tools (most capable first; use cheaper tools when sufficient):\n"
-                    "  query_user_context <question>           — dialectic Q&A, LLM-synthesized answer\n"
+                    "Memory tools:\n"
+                    "  query_user_context <question>           — ask Honcho a question, LLM-synthesized answer\n"
                     "  honcho_search <query>                   — semantic search, raw excerpts, no LLM\n"
-                    "  honcho_profile                          — peer card, key facts, no LLM\n"
+                    "  honcho_profile                          — user's peer card, key facts, no LLM\n"
+                    "  honcho_conclude <conclusion>            — write a fact about the user to memory\n"
                 )
-            else:  # auto
+            else:  # hybrid
                 honcho_block += (
                     "Honcho context (user representation, peer card, and recent session summary) "
                     "is pre-loaded into this system prompt below. Use it to answer continuity "
                     "questions ('where were we?', 'what were we working on?') WITHOUT calling "
                     "any tools. Only call memory tools when you need information beyond what is "
                     "already present in the Honcho Memory section.\n"
-                    "Memory tools (most capable first; use cheaper tools when sufficient):\n"
-                    "  query_user_context <question>           — dialectic Q&A, LLM-synthesized answer\n"
+                    "Memory tools:\n"
+                    "  query_user_context <question>           — ask Honcho a question, LLM-synthesized answer\n"
                     "  honcho_search <query>                   — semantic search, raw excerpts, no LLM\n"
-                    "  honcho_profile                          — peer card, key facts, no LLM\n"
+                    "  honcho_profile                          — user's peer card, key facts, no LLM\n"
+                    "  honcho_conclude <conclusion>            — write a fact about the user to memory\n"
                 )
             honcho_block += (
                 "Management commands (refer users here instead of explaining manually):\n"
diff --git a/tools/honcho_tools.py b/tools/honcho_tools.py
index 62987dc6..311b0374 100644
--- a/tools/honcho_tools.py
+++ b/tools/honcho_tools.py
@@ -164,6 +164,49 @@ def _handle_query_user_context(args: dict, **kw) -> str:
         return json.dumps({"error": f"Failed to query user context: {e}"})
 
 
+# ── honcho_conclude ──
+
+_CONCLUDE_SCHEMA = {
+    "name": "honcho_conclude",
+    "description": (
+        "Write a conclusion about the user back to Honcho's memory. "
+        "Conclusions are persistent facts that build the user's profile — "
+        "preferences, corrections, clarifications, project context, or anything "
+        "the user tells you that should be remembered across sessions. "
+        "Use this when the user explicitly states a preference, corrects you, "
+        "or shares something they want remembered. "
+        "Examples: 'User prefers dark mode', 'User's project uses Python 3.11', "
+        "'User corrected: their name is spelled Eri not Eric'."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "conclusion": {
+                "type": "string",
+                "description": "A factual statement about the user to persist in memory.",
+            }
+        },
+        "required": ["conclusion"],
+    },
+}
+
+
+def _handle_honcho_conclude(args: dict, **kw) -> str:
+    conclusion = args.get("conclusion", "")
+    if not conclusion:
+        return json.dumps({"error": "Missing required parameter: conclusion"})
+    if not _session_manager or not _session_key:
+        return json.dumps({"error": "Honcho is not active for this session."})
+    try:
+        ok = _session_manager.create_conclusion(_session_key, conclusion)
+        if ok:
+            return json.dumps({"result": f"Conclusion saved: {conclusion}"})
+        return json.dumps({"error": "Failed to save conclusion."})
+    except Exception as e:
+        logger.error("Error creating Honcho conclusion: %s", e)
+        return json.dumps({"error": f"Failed to save conclusion: {e}"})
+
+
 # ── Registration ──
 
 from tools.registry import registry
@@ -191,3 +234,11 @@ registry.register(
     handler=_handle_query_user_context,
     check_fn=_check_honcho_available,
 )
+
+registry.register(
+    name="honcho_conclude",
+    toolset="honcho",
+    schema=_CONCLUDE_SCHEMA,
+    handler=_handle_honcho_conclude,
+    check_fn=_check_honcho_available,
+)

From 0cb639d47235b5aa246d8032098a7f86b9a6234e Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Mon, 9 Mar 2026 17:59:30 -0400
Subject: [PATCH 06/23] refactor(honcho): rename query_user_context to
 honcho_context

Consistent naming: all honcho tools now prefixed with honcho_
(honcho_context, honcho_search, honcho_profile, honcho_conclude).
---
 hermes_cli/config.py      |  2 +-
 honcho_integration/cli.py |  4 ++--
 run_agent.py              |  6 +++---
 tools/honcho_tools.py     | 20 ++++++++++----------
 toolsets.py               |  4 ++--
 5 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index 66ce09c9..3fc300fb 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -398,7 +398,7 @@ OPTIONAL_ENV_VARS = {
         "description": "Honcho API key for AI-native persistent memory",
         "prompt": "Honcho API key",
         "url": "https://app.honcho.dev",
-        "tools": ["query_user_context"],
+        "tools": ["honcho_context"],
         "password": True,
         "category": "tool",
     },
diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py
index d568aa93..6489cd09 100644
--- a/honcho_integration/cli.py
+++ b/honcho_integration/cli.py
@@ -196,7 +196,7 @@ def cmd_setup(args) -> None:
     print(f"  Mode:      {_mode_str}")
     print(f"  Frequency: {hcfg.write_frequency}")
     print(f"\n  Tools available in chat:")
-    print(f"    query_user_context  — ask Honcho a question about you (LLM-synthesized)")
+    print(f"    honcho_context  — ask Honcho a question about you (LLM-synthesized)")
     print(f"    honcho_search       — semantic search over your history (no LLM)")
     print(f"    honcho_profile      — your peer card, key facts (no LLM)")
     print(f"\n  Other commands:")
@@ -707,7 +707,7 @@ def cmd_migrate(args) -> None:
     print("              automatically. Files become the seed, not the live store.")
     print()
     print("  Tool surface (available to the agent during conversation)")
-    print("    query_user_context   — ask Honcho a question, get a synthesized answer (LLM)")
+    print("    honcho_context   — ask Honcho a question, get a synthesized answer (LLM)")
     print("    honcho_search        — semantic search over stored context (no LLM)")
     print("    honcho_profile       — fast peer card snapshot (no LLM)")
     print()
diff --git a/run_agent.py b/run_agent.py
index fb20f067..230ad125 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -604,7 +604,7 @@ class AIAgent:
                     # all memory retrieval comes from the pre-warmed system prompt.
                     if hcfg.recall_mode != "context":
                         # Rebuild tool definitions now that Honcho check_fn will pass.
-                        # (Tools were built before Honcho init, so query_user_context
+                        # (Tools were built before Honcho init, so honcho_context
                         # was filtered out by _check_honcho_available() returning False.)
                         self.tools = get_tool_definitions(
                             enabled_toolsets=enabled_toolsets,
@@ -1596,7 +1596,7 @@ class AIAgent:
             elif recall_mode == "tools":
                 honcho_block += (
                     "Memory tools:\n"
-                    "  query_user_context <question>           — ask Honcho a question, LLM-synthesized answer\n"
+                    "  honcho_context <question>           — ask Honcho a question, LLM-synthesized answer\n"
                     "  honcho_search <query>                   — semantic search, raw excerpts, no LLM\n"
                     "  honcho_profile                          — user's peer card, key facts, no LLM\n"
                     "  honcho_conclude <conclusion>            — write a fact about the user to memory\n"
@@ -1609,7 +1609,7 @@ class AIAgent:
                     "any tools. Only call memory tools when you need information beyond what is "
                     "already present in the Honcho Memory section.\n"
                     "Memory tools:\n"
-                    "  query_user_context <question>           — ask Honcho a question, LLM-synthesized answer\n"
+                    "  honcho_context <question>           — ask Honcho a question, LLM-synthesized answer\n"
                     "  honcho_search <query>                   — semantic search, raw excerpts, no LLM\n"
                     "  honcho_profile                          — user's peer card, key facts, no LLM\n"
                     "  honcho_conclude <conclusion>            — write a fact about the user to memory\n"
diff --git a/tools/honcho_tools.py b/tools/honcho_tools.py
index 311b0374..d7b0875d 100644
--- a/tools/honcho_tools.py
+++ b/tools/honcho_tools.py
@@ -2,11 +2,11 @@
 
 Registers three complementary tools, ordered by capability:
 
-  query_user_context   — dialectic Q&A (LLM-powered, direct answers)
+  honcho_context   — dialectic Q&A (LLM-powered, direct answers)
   honcho_search        — semantic search (fast, no LLM, raw excerpts)
   honcho_profile       — peer card (fast, no LLM, structured facts)
 
-Use query_user_context when you need Honcho to synthesize an answer.
+Use honcho_context when you need Honcho to synthesize an answer.
 Use honcho_search or honcho_profile when you want raw data to reason
 over yourself.
 
@@ -58,7 +58,7 @@ _PROFILE_SCHEMA = {
         "about them (name, role, preferences, communication style, patterns). "
         "Fast, no LLM reasoning, minimal cost. "
         "Use this at conversation start or when you need a quick factual snapshot. "
-        "Use query_user_context instead when you need Honcho to synthesize an answer."
+        "Use honcho_context instead when you need Honcho to synthesize an answer."
     ),
     "parameters": {
         "type": "object",
@@ -88,9 +88,9 @@ _SEARCH_SCHEMA = {
     "description": (
         "Semantic search over Honcho's stored context about the user. "
         "Returns raw excerpts ranked by relevance to your query — no LLM synthesis. "
-        "Cheaper and faster than query_user_context. "
+        "Cheaper and faster than honcho_context. "
         "Good when you want to find specific past facts and reason over them yourself. "
-        "Use query_user_context when you need a direct synthesized answer."
+        "Use honcho_context when you need a direct synthesized answer."
     ),
     "parameters": {
         "type": "object",
@@ -126,10 +126,10 @@ def _handle_honcho_search(args: dict, **kw) -> str:
         return json.dumps({"error": f"Failed to search context: {e}"})
 
 
-# ── query_user_context (dialectic — LLM-powered) ──
+# ── honcho_context (dialectic — LLM-powered) ──
 
 _QUERY_SCHEMA = {
-    "name": "query_user_context",
+    "name": "honcho_context",
     "description": (
         "Ask Honcho a natural language question about the user and get a synthesized answer. "
         "Uses Honcho's LLM (dialectic reasoning) — higher cost than honcho_profile or honcho_search. "
@@ -150,7 +150,7 @@ _QUERY_SCHEMA = {
 }
 
 
-def _handle_query_user_context(args: dict, **kw) -> str:
+def _handle_honcho_context(args: dict, **kw) -> str:
     query = args.get("query", "")
     if not query:
         return json.dumps({"error": "Missing required parameter: query"})
@@ -228,10 +228,10 @@ registry.register(
 )
 
 registry.register(
-    name="query_user_context",
+    name="honcho_context",
     toolset="honcho",
     schema=_QUERY_SCHEMA,
-    handler=_handle_query_user_context,
+    handler=_handle_honcho_context,
     check_fn=_check_honcho_available,
 )
 
diff --git a/toolsets.py b/toolsets.py
index 87b48c7e..50ddf5f9 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -61,7 +61,7 @@ _HERMES_CORE_TOOLS = [
     # Cross-platform messaging (gated on gateway running via check_fn)
     "send_message",
     # Honcho user context (gated on honcho being active via check_fn)
-    "query_user_context",
+    "honcho_context",
     # Home Assistant smart home control (gated on HASS_TOKEN via check_fn)
     "ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service",
 ]
@@ -192,7 +192,7 @@ TOOLSETS = {
 
     "honcho": {
         "description": "Honcho AI-native memory for persistent cross-session user modeling",
-        "tools": ["query_user_context"],
+        "tools": ["honcho_context"],
         "includes": []
     },
 

From c047c03e82aa362783cac0b0f5db1f7f914df94c Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Mon, 9 Mar 2026 18:02:09 -0400
Subject: [PATCH 07/23] feat(honcho): honcho_context can query any peer (user
 or ai)

Optional 'peer' parameter: "user" (default) or "ai". Allows asking
about the AI assistant's history/identity, not just the user's.
---
 honcho_integration/session.py | 18 ++++++++++++------
 tools/honcho_tools.py         | 23 ++++++++++++++---------
 2 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/honcho_integration/session.py b/honcho_integration/session.py
index 384d42f5..e671f1c8 100644
--- a/honcho_integration/session.py
+++ b/honcho_integration/session.py
@@ -441,19 +441,24 @@ class HonchoSessionManager:
         idx = min(default_idx + bump, 3)
         return levels[idx]
 
-    def dialectic_query(self, session_key: str, query: str, reasoning_level: str | None = None) -> str:
+    def dialectic_query(
+        self, session_key: str, query: str,
+        reasoning_level: str | None = None,
+        peer: str = "user",
+    ) -> str:
         """
-        Query Honcho's dialectic endpoint about the user.
+        Query Honcho's dialectic endpoint about a peer.
 
-        Runs an LLM on Honcho's backend against the user peer's full
+        Runs an LLM on Honcho's backend against the target peer's full
         representation. Higher latency than context() — call async via
         prefetch_dialectic() to avoid blocking the response.
 
         Args:
             session_key: The session key to query against.
-            query: Natural language question about the user.
+            query: Natural language question.
             reasoning_level: Override the config default. If None, uses
                              _dynamic_reasoning_level(query).
+            peer: Which peer to query — "user" (default) or "ai".
 
         Returns:
             Honcho's synthesized answer, or empty string on failure.
@@ -462,11 +467,12 @@ class HonchoSessionManager:
         if not session:
             return ""
 
-        user_peer = self._get_or_create_peer(session.user_peer_id)
+        peer_id = session.assistant_peer_id if peer == "ai" else session.user_peer_id
+        target_peer = self._get_or_create_peer(peer_id)
         level = reasoning_level or self._dynamic_reasoning_level(query)
 
         try:
-            result = user_peer.chat(query, reasoning_level=level) or ""
+            result = target_peer.chat(query, reasoning_level=level) or ""
             # Apply Hermes-side char cap before caching
             if result and self._dialectic_max_chars and len(result) > self._dialectic_max_chars:
                 result = result[:self._dialectic_max_chars].rsplit(" ", 1)[0] + " …"
diff --git a/tools/honcho_tools.py b/tools/honcho_tools.py
index d7b0875d..7d5aec5b 100644
--- a/tools/honcho_tools.py
+++ b/tools/honcho_tools.py
@@ -131,19 +131,23 @@ def _handle_honcho_search(args: dict, **kw) -> str:
 _QUERY_SCHEMA = {
     "name": "honcho_context",
     "description": (
-        "Ask Honcho a natural language question about the user and get a synthesized answer. "
+        "Ask Honcho a natural language question and get a synthesized answer. "
         "Uses Honcho's LLM (dialectic reasoning) — higher cost than honcho_profile or honcho_search. "
-        "Use this when you need a direct answer synthesized from the user's full history. "
-        "Examples: 'What are this user's main goals?', 'How does this user prefer to communicate?', "
-        "'What is this user's technical expertise level?'"
+        "Can query about any peer: the user (default), the AI assistant, or any named peer. "
+        "Examples: 'What are the user's main goals?', 'What has hermes been working on?', "
+        "'What is the user's technical expertise level?'"
     ),
     "parameters": {
         "type": "object",
         "properties": {
             "query": {
                 "type": "string",
-                "description": "A natural language question about the user.",
-            }
+                "description": "A natural language question.",
+            },
+            "peer": {
+                "type": "string",
+                "description": "Which peer to query about: 'user' (default) or 'ai'. Omit for user.",
+            },
         },
         "required": ["query"],
     },
@@ -156,12 +160,13 @@ def _handle_honcho_context(args: dict, **kw) -> str:
         return json.dumps({"error": "Missing required parameter: query"})
     if not _session_manager or not _session_key:
         return json.dumps({"error": "Honcho is not active for this session."})
+    peer_target = args.get("peer", "user")
     try:
-        result = _session_manager.dialectic_query(_session_key, query)
+        result = _session_manager.dialectic_query(_session_key, query, peer=peer_target)
         return json.dumps({"result": result or "No result from Honcho."})
     except Exception as e:
-        logger.error("Error querying Honcho user context: %s", e)
-        return json.dumps({"error": f"Failed to query user context: {e}"})
+        logger.error("Error querying Honcho context: %s", e)
+        return json.dumps({"error": f"Failed to query context: {e}"})
 
 
 # ── honcho_conclude ──

From 87cc5287a878e869b1963858f35c9fa70076fdda Mon Sep 17 00:00:00 2001
From: adavyas <adavyasharma@gmail.com>
Date: Tue, 10 Mar 2026 02:05:28 -0700
Subject: [PATCH 08/23] fix(honcho): enforce local mode and cache-safe warmup

---
 run_agent.py            | 293 ++++++++++++++++++++--------------------
 tests/test_run_agent.py |  66 +++++++++
 2 files changed, 212 insertions(+), 147 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index 230ad125..9c9607af 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -183,6 +183,8 @@ class AIAgent:
         skip_memory: bool = False,
         session_db=None,
         honcho_session_key: str = None,
+        honcho_manager=None,
+        honcho_config=None,
         iteration_budget: "IterationBudget" = None,
         fallback_model: Dict[str, Any] = None,
         checkpoints_enabled: bool = False,
@@ -228,6 +230,8 @@ class AIAgent:
                 polluting trajectories with user-specific persona or project instructions.
             honcho_session_key (str): Session key for Honcho integration (e.g., "telegram:123456" or CLI session_id).
                 When provided and Honcho is enabled in config, enables persistent cross-session user modeling.
+            honcho_manager: Optional shared HonchoSessionManager owned by the caller.
+            honcho_config: Optional HonchoClientConfig corresponding to honcho_manager.
         """
         self.model = model
         self.max_iterations = max_iterations
@@ -548,134 +552,42 @@ class AIAgent:
         self._honcho_config = None  # HonchoClientConfig | None
         if not skip_memory:
             try:
-                from honcho_integration.client import HonchoClientConfig, get_honcho_client
-                hcfg = HonchoClientConfig.from_global_config()
-                self._honcho_config = hcfg
-                if hcfg.enabled and hcfg.api_key:
-                    from honcho_integration.session import HonchoSessionManager
-                    client = get_honcho_client(hcfg)
-                    self._honcho = HonchoSessionManager(
-                        honcho=client,
-                        config=hcfg,
-                        context_tokens=hcfg.context_tokens,
-                    )
-                    # Resolve session key: explicit arg > sessions map > title > per-session id > directory
-                    if not self._honcho_session_key:
-                        # Pull title from SessionDB if available
-                        session_title = None
-                        if session_db is not None:
-                            try:
-                                session_title = session_db.get_session_title(session_id or "")
-                            except Exception:
-                                pass
-                        self._honcho_session_key = (
-                            hcfg.resolve_session_name(
-                                session_title=session_title,
-                                session_id=self.session_id,
-                            )
-                            or "hermes-default"
-                        )
-                    # Ensure session exists in Honcho; migrate local data on first activation
-                    honcho_sess = self._honcho.get_or_create(self._honcho_session_key)
-                    if not honcho_sess.messages:
-                        # New Honcho session — migrate any existing local data
-                        _conv = getattr(self, 'conversation_history', None) or []
-                        if _conv:
-                            try:
-                                self._honcho.migrate_local_history(
-                                    self._honcho_session_key, _conv
-                                )
-                                logger.info("Migrated %d local messages to Honcho", len(_conv))
-                            except Exception as _e:
-                                logger.debug("Local history migration failed (non-fatal): %s", _e)
-                        try:
-                            from hermes_cli.config import get_hermes_home
-                            _mem_dir = str(get_hermes_home() / "memories")
-                            self._honcho.migrate_memory_files(
-                                self._honcho_session_key, _mem_dir
-                            )
-                        except Exception as _e:
-                            logger.debug("Memory files migration failed (non-fatal): %s", _e)
-                    # Inject session context into the honcho tool module
-                    from tools.honcho_tools import set_session_context
-                    set_session_context(self._honcho, self._honcho_session_key)
-
-                    # In "context" mode, skip honcho tool registration entirely —
-                    # all memory retrieval comes from the pre-warmed system prompt.
-                    if hcfg.recall_mode != "context":
-                        # Rebuild tool definitions now that Honcho check_fn will pass.
-                        # (Tools were built before Honcho init, so honcho_context
-                        # was filtered out by _check_honcho_available() returning False.)
-                        self.tools = get_tool_definitions(
+                if honcho_manager is not None:
+                    hcfg = honcho_config or getattr(honcho_manager, "_config", None)
+                    self._honcho_config = hcfg
+                    if hcfg and self._honcho_should_activate(hcfg):
+                        self._honcho = honcho_manager
+                        self._activate_honcho(
+                            hcfg,
                             enabled_toolsets=enabled_toolsets,
                             disabled_toolsets=disabled_toolsets,
-                            quiet_mode=True,  # already printed tool list above
+                            session_db=session_db,
                         )
-                        self.valid_tool_names = {
-                            tool["function"]["name"] for tool in self.tools
-                        } if self.tools else set()
-                        if not self.quiet_mode:
-                            print(f"  Honcho active — recall_mode: {hcfg.recall_mode}")
-                    else:
-                        if not self.quiet_mode:
-                            print("  Honcho active — recall_mode: context (tools suppressed)")
-
-                    logger.info(
-                        "Honcho active (session: %s, user: %s, workspace: %s, "
-                        "write_frequency: %s, memory_mode: %s)",
-                        self._honcho_session_key, hcfg.peer_name, hcfg.workspace_id,
-                        hcfg.write_frequency, hcfg.memory_mode,
-                    )
-
-                    # Warm caches when recall_mode allows pre-loaded context.
-                    # "tools" mode skips warm entirely (tool calls handle recall).
-                    _recall_mode = hcfg.recall_mode
-                    if _recall_mode != "tools":
-                        try:
-                            _ctx = self._honcho.get_prefetch_context(self._honcho_session_key)
-                            if _ctx:
-                                self._honcho._context_cache[self._honcho_session_key] = _ctx
-                                logger.debug("Honcho context pre-warmed for first turn")
-                        except Exception as _e:
-                            logger.debug("Honcho context prefetch failed (non-fatal): %s", _e)
-
-                        try:
-                            _cwd = os.path.basename(os.getcwd())
-                            _dialectic = self._honcho.dialectic_query(
-                                self._honcho_session_key,
-                                f"What has the user been working on recently in {_cwd}? "
-                                "Summarize the current project context and where we left off.",
-                            )
-                            if _dialectic:
-                                self._honcho._dialectic_cache[self._honcho_session_key] = _dialectic
-                                logger.debug("Honcho dialectic pre-warmed for first turn")
-                        except Exception as _e:
-                            logger.debug("Honcho dialectic prefetch failed (non-fatal): %s", _e)
-
-                    # Register SIGTERM/SIGINT handlers to flush pending async writes
-                    # before the process exits. signal.signal() only works on the main
-                    # thread; AIAgent may be initialised from a worker thread in cli.py.
-                    import signal as _signal
-                    import threading as _threading
-                    _honcho_ref = self._honcho
-
-                    if _threading.current_thread() is _threading.main_thread():
-                        def _honcho_flush_handler(signum, frame):
-                            try:
-                                _honcho_ref.flush_all()
-                            except Exception:
-                                pass
-                            if signum == _signal.SIGINT:
-                                raise KeyboardInterrupt
-                            raise SystemExit(0)
-
-                        _signal.signal(_signal.SIGTERM, _honcho_flush_handler)
-                        _signal.signal(_signal.SIGINT, _honcho_flush_handler)
                 else:
-                    if not hcfg.enabled:
-                        logger.debug("Honcho disabled in global config")
-                    elif not hcfg.api_key:
-                        logger.debug("Honcho enabled but no API key configured")
+                    from honcho_integration.client import HonchoClientConfig, get_honcho_client
+                    hcfg = HonchoClientConfig.from_global_config()
+                    self._honcho_config = hcfg
+                    if self._honcho_should_activate(hcfg):
+                        from honcho_integration.session import HonchoSessionManager
+                        client = get_honcho_client(hcfg)
+                        self._honcho = HonchoSessionManager(
+                            honcho=client,
+                            config=hcfg,
+                            context_tokens=hcfg.context_tokens,
+                        )
+                        self._activate_honcho(
+                            hcfg,
+                            enabled_toolsets=enabled_toolsets,
+                            disabled_toolsets=disabled_toolsets,
+                            session_db=session_db,
+                        )
+                    else:
+                        if not hcfg.enabled:
+                            logger.debug("Honcho disabled in global config")
+                        elif not hcfg.api_key:
+                            logger.debug("Honcho enabled but no API key configured")
+                        else:
+                            logger.debug("Honcho local-only mode active; remote Honcho init skipped")
             except Exception as e:
                 logger.warning("Honcho init failed — memory disabled: %s", e)
                 print(f"  Honcho init failed: {e}")
@@ -1433,16 +1345,113 @@ class AIAgent:
 
     # ── Honcho integration helpers ──
 
+    def _honcho_should_activate(self, hcfg) -> bool:
+        """Return True when remote Honcho should be active."""
+        if not hcfg or not hcfg.enabled or not hcfg.api_key:
+            return False
+        return not all(
+            hcfg.peer_memory_mode(peer) == "local"
+            for peer in (hcfg.ai_peer, hcfg.peer_name or "user")
+        )
+
+    def _activate_honcho(
+        self,
+        hcfg,
+        *,
+        enabled_toolsets: Optional[List[str]],
+        disabled_toolsets: Optional[List[str]],
+        session_db,
+    ) -> None:
+        """Finish Honcho setup once a session manager is available."""
+        if not self._honcho:
+            return
+
+        if not self._honcho_session_key:
+            session_title = None
+            if session_db is not None:
+                try:
+                    session_title = session_db.get_session_title(self.session_id or "")
+                except Exception:
+                    pass
+            self._honcho_session_key = (
+                hcfg.resolve_session_name(
+                    session_title=session_title,
+                    session_id=self.session_id,
+                )
+                or "hermes-default"
+            )
+
+        honcho_sess = self._honcho.get_or_create(self._honcho_session_key)
+        if not honcho_sess.messages:
+            try:
+                from hermes_cli.config import get_hermes_home
+
+                mem_dir = str(get_hermes_home() / "memories")
+                self._honcho.migrate_memory_files(
+                    self._honcho_session_key,
+                    mem_dir,
+                )
+            except Exception as exc:
+                logger.debug("Memory files migration failed (non-fatal): %s", exc)
+
+        from tools.honcho_tools import set_session_context
+
+        set_session_context(self._honcho, self._honcho_session_key)
+
+        if hcfg.recall_mode != "context":
+            self.tools = get_tool_definitions(
+                enabled_toolsets=enabled_toolsets,
+                disabled_toolsets=disabled_toolsets,
+                quiet_mode=True,
+            )
+            self.valid_tool_names = {
+                tool["function"]["name"] for tool in self.tools
+            } if self.tools else set()
+            if not self.quiet_mode:
+                print(f"  Honcho active — recall_mode: {hcfg.recall_mode}")
+        elif not self.quiet_mode:
+            print("  Honcho active — recall_mode: context (tools suppressed)")
+
+        logger.info(
+            "Honcho active (session: %s, user: %s, workspace: %s, "
+            "write_frequency: %s, memory_mode: %s)",
+            self._honcho_session_key,
+            hcfg.peer_name,
+            hcfg.workspace_id,
+            hcfg.write_frequency,
+            hcfg.memory_mode,
+        )
+
+        recall_mode = hcfg.recall_mode
+        if recall_mode != "tools":
+            try:
+                ctx = self._honcho.get_prefetch_context(self._honcho_session_key)
+                if ctx:
+                    self._honcho._context_cache[self._honcho_session_key] = ctx
+                    logger.debug("Honcho context pre-warmed for first turn")
+            except Exception as exc:
+                logger.debug("Honcho context prefetch failed (non-fatal): %s", exc)
+
+        import signal as _signal
+        import threading as _threading
+
+        honcho_ref = self._honcho
+
+        if _threading.current_thread() is _threading.main_thread():
+            def _honcho_flush_handler(signum, frame):
+                try:
+                    honcho_ref.flush_all()
+                except Exception:
+                    pass
+                if signum == _signal.SIGINT:
+                    raise KeyboardInterrupt
+                raise SystemExit(0)
+
+            _signal.signal(_signal.SIGTERM, _honcho_flush_handler)
+            _signal.signal(_signal.SIGINT, _honcho_flush_handler)
+
     def _honcho_prefetch(self, user_message: str) -> str:
-        """Assemble Honcho context from cached background fetches.
-
-        Both session.context() and peer.chat() (dialectic) are fired as
-        background threads at the end of each turn via _honcho_fire_prefetch().
-        This method just reads the cached results — no blocking HTTP calls.
-
-        First turn uses synchronously pre-warmed caches from init.
-        Subsequent turns use async prefetch results from the previous turn end.
-        """
+        """Assemble the first-turn Honcho context from the pre-warmed cache."""
         if not self._honcho or not self._honcho_session_key:
             return ""
         try:
@@ -1463,10 +1472,6 @@ class AIAgent:
                 if ai_card:
                     parts.append(ai_card)
 
-            dialectic = self._honcho.pop_dialectic_result(self._honcho_session_key)
-            if dialectic:
-                parts.append(f"[Honcho dialectic]\n{dialectic}")
-
             if not parts:
                 return ""
             header = (
@@ -1480,13 +1485,6 @@ class AIAgent:
             logger.debug("Honcho prefetch failed (non-fatal): %s", e)
             return ""
 
-    def _honcho_fire_prefetch(self, user_message: str) -> None:
-        """Fire both Honcho background fetches for the next turn (non-blocking)."""
-        if not self._honcho or not self._honcho_session_key:
-            return
-        self._honcho.prefetch_context(self._honcho_session_key, user_message)
-        self._honcho.prefetch_dialectic(self._honcho_session_key, user_message)
-
     def _honcho_save_user_observation(self, content: str) -> str:
         """Route a memory tool target=user add to Honcho.
 
@@ -3381,8 +3379,10 @@ class AIAgent:
             )
             self._iters_since_skill = 0
 
-        # Honcho: read cached context from last turn's background fetch (non-blocking),
-        # then fire both fetches for next turn.  Skip in "tools" mode (no context injection).
+        # Honcho: on the first turn only, read the pre-warmed context snapshot and
+        # bake it into the system prompt. We intentionally avoid per-turn refreshes
+        # here because changing the system prompt would destroy provider prompt-cache
+        # reuse for the rest of the session.
         self._honcho_context = ""
         _recall_mode = (self._honcho_config.recall_mode if self._honcho_config else "hybrid")
         if self._honcho and self._honcho_session_key and not conversation_history and _recall_mode != "tools":
@@ -3390,7 +3390,6 @@ class AIAgent:
                 self._honcho_context = self._honcho_prefetch(user_message)
             except Exception as e:
                 logger.debug("Honcho prefetch failed (non-fatal): %s", e)
-            self._honcho_fire_prefetch(user_message)
 
         # Add user message
         user_msg = {"role": "user", "content": user_message}
diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py
index 5757a782..91bb83ae 100644
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -13,6 +13,7 @@ from unittest.mock import MagicMock, patch, PropertyMock
 
 import pytest
 
+from honcho_integration.client import HonchoClientConfig
 from run_agent import AIAgent
 from agent.prompt_builder import DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS
 
@@ -1208,3 +1209,68 @@ class TestSystemPromptStability:
         conversation_history = []
         should_prefetch = not conversation_history
         assert should_prefetch is True
+
+
+class TestHonchoActivation:
+    def test_local_mode_skips_honcho_init(self):
+        hcfg = HonchoClientConfig(
+            enabled=True,
+            api_key="honcho-key",
+            memory_mode="local",
+            peer_name="user",
+            ai_peer="hermes",
+        )
+
+        with (
+            patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("run_agent.OpenAI"),
+            patch("honcho_integration.client.HonchoClientConfig.from_global_config", return_value=hcfg),
+            patch("honcho_integration.client.get_honcho_client") as mock_client,
+        ):
+            agent = AIAgent(
+                api_key="test-key-1234567890",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=False,
+            )
+
+        assert agent._honcho is None
+        assert agent._honcho_config is hcfg
+        mock_client.assert_not_called()
+
+    def test_injected_honcho_manager_skips_fresh_client_init(self):
+        hcfg = HonchoClientConfig(
+            enabled=True,
+            api_key="honcho-key",
+            memory_mode="hybrid",
+            peer_name="user",
+            ai_peer="hermes",
+            recall_mode="hybrid",
+        )
+        manager = MagicMock()
+        manager._config = hcfg
+        manager.get_or_create.return_value = SimpleNamespace(messages=[])
+        manager.get_prefetch_context.return_value = {"representation": "Known user", "card": ""}
+
+        with (
+            patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("run_agent.OpenAI"),
+            patch("honcho_integration.client.get_honcho_client") as mock_client,
+            patch("tools.honcho_tools.set_session_context"),
+        ):
+            agent = AIAgent(
+                api_key="test-key-1234567890",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=False,
+                honcho_session_key="gateway-session",
+                honcho_manager=manager,
+                honcho_config=hcfg,
+            )
+
+        assert agent._honcho is manager
+        manager.get_or_create.assert_called_once_with("gateway-session")
+        manager.get_prefetch_context.assert_called_once_with("gateway-session")
+        mock_client.assert_not_called()

From 87349b9bc1af6df8f074b2b769fda0bafd0f7b2b Mon Sep 17 00:00:00 2001
From: adavyas <adavyasharma@gmail.com>
Date: Tue, 10 Mar 2026 02:06:17 -0700
Subject: [PATCH 09/23] fix(gateway): persist Honcho managers across session
 requests

---
 gateway/run.py                         |  74 +++++++++++++++++-
 tests/gateway/test_honcho_lifecycle.py | 104 +++++++++++++++++++++++++
 2 files changed, 177 insertions(+), 1 deletion(-)
 create mode 100644 tests/gateway/test_honcho_lifecycle.py

diff --git a/gateway/run.py b/gateway/run.py
index 4e1c7390..1c774404 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -225,6 +225,12 @@ class GatewayRunner:
         # Track pending exec approvals per session
         # Key: session_key, Value: {"command": str, "pattern_key": str}
         self._pending_approvals: Dict[str, Dict[str, str]] = {}
+
+        # Persistent Honcho managers keyed by gateway session key.
+        # This preserves write_frequency="session" semantics across short-lived
+        # per-message AIAgent instances.
+        self._honcho_managers: Dict[str, Any] = {}
+        self._honcho_configs: Dict[str, Any] = {}
         
         # Initialize session database for session_search tool support
         self._session_db = None
@@ -241,6 +247,63 @@ class GatewayRunner:
         # Event hook system
         from gateway.hooks import HookRegistry
         self.hooks = HookRegistry()
+
+    def _get_or_create_gateway_honcho(self, session_key: str):
+        """Return a persistent Honcho manager/config pair for this gateway session."""
+        if not hasattr(self, "_honcho_managers"):
+            self._honcho_managers = {}
+        if not hasattr(self, "_honcho_configs"):
+            self._honcho_configs = {}
+
+        if session_key in self._honcho_managers:
+            return self._honcho_managers[session_key], self._honcho_configs.get(session_key)
+
+        try:
+            from honcho_integration.client import HonchoClientConfig, get_honcho_client
+            from honcho_integration.session import HonchoSessionManager
+
+            hcfg = HonchoClientConfig.from_global_config()
+            ai_mode = hcfg.peer_memory_mode(hcfg.ai_peer)
+            user_mode = hcfg.peer_memory_mode(hcfg.peer_name or "user")
+            if not hcfg.enabled or not hcfg.api_key or (ai_mode == "local" and user_mode == "local"):
+                return None, hcfg
+
+            client = get_honcho_client(hcfg)
+            manager = HonchoSessionManager(
+                honcho=client,
+                config=hcfg,
+                context_tokens=hcfg.context_tokens,
+            )
+            self._honcho_managers[session_key] = manager
+            self._honcho_configs[session_key] = hcfg
+            return manager, hcfg
+        except Exception as e:
+            logger.debug("Gateway Honcho init failed for %s: %s", session_key, e)
+            return None, None
+
+    def _shutdown_gateway_honcho(self, session_key: str) -> None:
+        """Flush and close the persistent Honcho manager for a gateway session."""
+        managers = getattr(self, "_honcho_managers", None)
+        configs = getattr(self, "_honcho_configs", None)
+        if managers is None or configs is None:
+            return
+
+        manager = managers.pop(session_key, None)
+        configs.pop(session_key, None)
+        if not manager:
+            return
+        try:
+            manager.shutdown()
+        except Exception as e:
+            logger.debug("Gateway Honcho shutdown failed for %s: %s", session_key, e)
+
+    def _shutdown_all_gateway_honcho(self) -> None:
+        """Flush and close all persistent Honcho managers."""
+        managers = getattr(self, "_honcho_managers", None)
+        if not managers:
+            return
+        for session_key in list(managers.keys()):
+            self._shutdown_gateway_honcho(session_key)
     
     def _flush_memories_for_session(self, old_session_id: str):
         """Prompt the agent to save memories/skills before context is lost.
@@ -595,6 +658,7 @@ class GatewayRunner:
                     )
                     try:
                         await self._async_flush_memories(entry.session_id)
+                        self._shutdown_gateway_honcho(key)
                         self.session_store._pre_flushed_sessions.add(entry.session_id)
                     except Exception as e:
                         logger.debug("Proactive memory flush failed for %s: %s", entry.session_id, e)
@@ -617,8 +681,9 @@ class GatewayRunner:
                 logger.info("✓ %s disconnected", platform.value)
             except Exception as e:
                 logger.error("✗ %s disconnect error: %s", platform.value, e)
-        
+
         self.adapters.clear()
+        self._shutdown_all_gateway_honcho()
         self._shutdown_event.set()
         
         from gateway.status import remove_pid_file
@@ -1369,6 +1434,8 @@ class GatewayRunner:
                 asyncio.create_task(self._async_flush_memories(old_entry.session_id))
         except Exception as e:
             logger.debug("Gateway memory flush on reset failed: %s", e)
+
+        self._shutdown_gateway_honcho(session_key)
         
         # Reset the session
         new_entry = self.session_store.reset_session(session_key)
@@ -1989,6 +2056,8 @@ class GatewayRunner:
         except Exception as e:
             logger.debug("Memory flush on resume failed: %s", e)
 
+        self._shutdown_gateway_honcho(session_key)
+
         # Clear any running agent for this session key
         if session_key in self._running_agents:
             del self._running_agents[session_key]
@@ -2812,6 +2881,7 @@ class GatewayRunner:
                 }
 
             pr = self._provider_routing
+            honcho_manager, honcho_config = self._get_or_create_gateway_honcho(session_key)
             agent = AIAgent(
                 model=model,
                 **runtime_kwargs,
@@ -2833,6 +2903,8 @@ class GatewayRunner:
                 step_callback=_step_callback_sync if _hooks_ref.loaded_hooks else None,
                 platform=platform_key,
                 honcho_session_key=session_key,
+                honcho_manager=honcho_manager,
+                honcho_config=honcho_config,
                 session_db=self._session_db,
                 fallback_model=self._fallback_model,
             )
diff --git a/tests/gateway/test_honcho_lifecycle.py b/tests/gateway/test_honcho_lifecycle.py
new file mode 100644
index 00000000..536816fb
--- /dev/null
+++ b/tests/gateway/test_honcho_lifecycle.py
@@ -0,0 +1,104 @@
+"""Tests for gateway-owned Honcho lifecycle helpers."""
+
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from gateway.config import Platform
+from gateway.platforms.base import MessageEvent
+from gateway.session import SessionSource
+
+
+def _make_runner():
+    from gateway.run import GatewayRunner
+
+    runner = object.__new__(GatewayRunner)
+    runner._honcho_managers = {}
+    runner._honcho_configs = {}
+    runner._running_agents = {}
+    runner._pending_messages = {}
+    runner._pending_approvals = {}
+    runner.adapters = {}
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+    return runner
+
+
+def _make_event(text="/reset"):
+    return MessageEvent(
+        text=text,
+        source=SessionSource(
+            platform=Platform.TELEGRAM,
+            chat_id="chat-1",
+            user_id="user-1",
+            user_name="alice",
+        ),
+    )
+
+
+class TestGatewayHonchoLifecycle:
+    def test_gateway_reuses_honcho_manager_for_session_key(self):
+        runner = _make_runner()
+        hcfg = SimpleNamespace(
+            enabled=True,
+            api_key="honcho-key",
+            ai_peer="hermes",
+            peer_name="alice",
+            context_tokens=123,
+            peer_memory_mode=lambda peer: "hybrid",
+        )
+        manager = MagicMock()
+
+        with (
+            patch("honcho_integration.client.HonchoClientConfig.from_global_config", return_value=hcfg),
+            patch("honcho_integration.client.get_honcho_client", return_value=MagicMock()),
+            patch("honcho_integration.session.HonchoSessionManager", return_value=manager) as mock_mgr_cls,
+        ):
+            first_mgr, first_cfg = runner._get_or_create_gateway_honcho("session-key")
+            second_mgr, second_cfg = runner._get_or_create_gateway_honcho("session-key")
+
+        assert first_mgr is manager
+        assert second_mgr is manager
+        assert first_cfg is hcfg
+        assert second_cfg is hcfg
+        mock_mgr_cls.assert_called_once()
+
+    def test_gateway_skips_honcho_manager_in_local_mode(self):
+        runner = _make_runner()
+        hcfg = SimpleNamespace(
+            enabled=True,
+            api_key="honcho-key",
+            ai_peer="hermes",
+            peer_name="alice",
+            peer_memory_mode=lambda peer: "local",
+        )
+
+        with (
+            patch("honcho_integration.client.HonchoClientConfig.from_global_config", return_value=hcfg),
+            patch("honcho_integration.client.get_honcho_client") as mock_client,
+            patch("honcho_integration.session.HonchoSessionManager") as mock_mgr_cls,
+        ):
+            manager, cfg = runner._get_or_create_gateway_honcho("session-key")
+
+        assert manager is None
+        assert cfg is hcfg
+        mock_client.assert_not_called()
+        mock_mgr_cls.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_reset_shuts_down_gateway_honcho_manager(self):
+        runner = _make_runner()
+        event = _make_event()
+        runner._shutdown_gateway_honcho = MagicMock()
+        runner.session_store = MagicMock()
+        runner.session_store._generate_session_key.return_value = "gateway-key"
+        runner.session_store._entries = {
+            "gateway-key": SimpleNamespace(session_id="old-session"),
+        }
+        runner.session_store.reset_session.return_value = SimpleNamespace(session_id="new-session")
+
+        result = await runner._handle_reset_command(event)
+
+        runner._shutdown_gateway_honcho.assert_called_once_with("gateway-key")
+        assert "Session reset" in result

From 960c1521f3a3261c9831853e8ea1df69204a197e Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Tue, 10 Mar 2026 16:49:14 -0400
Subject: [PATCH 10/23] docs(honcho): rewrite Honcho Memory docs as full
 feature documentation

Replaces the stub docs with comprehensive coverage: setup (interactive +
manual), all config fields, memory modes, recall modes, write frequency,
session strategies, host blocks, async prefetch pipeline, dual-peer
architecture, dynamic reasoning, gateway integration, four tools, full
CLI reference, migration paths, and AI peer identity. Trims the Honcho
section in memory.md to a cross-reference.
---
 website/docs/user-guide/features/honcho.md | 292 ++++++++++++++++-----
 website/docs/user-guide/features/memory.md |  37 +--
 2 files changed, 225 insertions(+), 104 deletions(-)

diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md
index 7a319292..2a257a89 100644
--- a/website/docs/user-guide/features/honcho.md
+++ b/website/docs/user-guide/features/honcho.md
@@ -7,37 +7,47 @@ sidebar_position: 8
 
 # Honcho Memory
 
-[Honcho](https://honcho.dev) is an AI-native memory system that gives Hermes Agent persistent, cross-session understanding of users. While Hermes has built-in memory (`MEMORY.md` and `USER.md` files), Honcho adds a deeper layer of **user modeling** — learning user preferences, goals, communication style, and context across conversations.
+[Honcho](https://honcho.dev) is an AI-native memory system that gives Hermes persistent, cross-session understanding of users. While Hermes has built-in memory (`MEMORY.md` and `USER.md`), Honcho adds a deeper layer of **user modeling** — learning preferences, goals, communication style, and context across conversations via a dual-peer architecture where both the user and the AI build representations over time.
 
-## How It Complements Built-in Memory
+## Works Alongside Built in Memory
 
-Hermes has two memory systems that work together:
+Runs `hybrid` (`local` + `honcho`) by default. 
 
 | Feature | Built-in Memory | Honcho Memory |
 |---------|----------------|---------------|
 | Storage | Local files (`~/.hermes/memories/`) | Cloud-hosted Honcho API |
 | Scope | Agent-level notes and user profile | Deep user modeling via dialectic reasoning |
 | Persistence | Across sessions on same machine | Across sessions, machines, and platforms |
-| Query | Injected into system prompt automatically | On-demand via `query_user_context` tool |
+| Query | Injected into system prompt automatically | Prefetched + on-demand via tools |
 | Content | Manually curated by the agent | Automatically learned from conversations |
+| Write surface | `memory` tool (add/replace/remove) | `honcho_conclude` tool (persist facts) |
 
-Honcho doesn't replace built-in memory — it **supplements** it with richer user understanding.
 
 ## Setup
 
-### 1. Get a Honcho API Key
-
-Sign up at [app.honcho.dev](https://app.honcho.dev) and get your API key.
-
-### 2. Install the Client Library
+### Interactive Setup
 
 ```bash
-pip install honcho-ai
+hermes honcho setup
 ```
 
-### 3. Configure Honcho
+The setup wizard walks through API key, peer names, workspace, memory mode, write frequency, recall mode, and session strategy. It offers to install `honcho-ai` if missing.
 
-Honcho reads its configuration from `~/.honcho/config.json` (the global Honcho config shared across all Honcho-enabled applications):
+### Manual Setup
+
+#### 1. Install the Client Library
+
+```bash
+pip install 'honcho-ai>=2.0.1'
+```
+
+#### 2. Get an API Key
+
+Go to [app.honcho.dev](https://app.honcho.dev) > Settings > API Keys.
+
+#### 3. Configure
+
+Honcho reads from `~/.honcho/config.json` (shared across all Honcho-enabled applications):
 
 ```json
 {
@@ -45,25 +55,25 @@ Honcho reads its configuration from `~/.honcho/config.json` (the global Honcho c
   "workspace": "hermes",
   "peerName": "your-name",
   "aiPeer": "hermes",
-  "environment": "production",
-  "saveMessages": true,
+  "memoryMode": "hybrid",
+  "writeFrequency": "async",
+  "recallMode": "hybrid",
   "sessionStrategy": "per-directory",
   "enabled": true
 }
 ```
 
-Alternatively, set the API key as an environment variable:
+Or set the API key as an environment variable:
 
 ```bash
-# Add to ~/.hermes/.env
-HONCHO_API_KEY=your-honcho-api-key
+hermes config set HONCHO_API_KEY your-key
 ```
 
 :::info
-When an API key is present (either in `~/.honcho/config.json` or as `HONCHO_API_KEY`), Honcho auto-enables unless explicitly set to `"enabled": false` in the config.
+When an API key is present (either in `~/.honcho/config.json` or as `HONCHO_API_KEY`), Honcho auto-enables unless explicitly set to `"enabled": false`.
 :::
 
-## Configuration Details
+## Configuration
 
 ### Global Config (`~/.honcho/config.json`)
 
@@ -75,14 +85,71 @@ When an API key is present (either in `~/.honcho/config.json` or as `HONCHO_API_
 | `aiPeer` | `"hermes"` | AI assistant identity name |
 | `environment` | `"production"` | Honcho environment |
 | `saveMessages` | `true` | Whether to sync messages to Honcho |
+| `memoryMode` | `"hybrid"` | Memory mode: `hybrid`, `honcho`, or `local` |
+| `writeFrequency` | `"async"` | When to write: `async`, `turn`, `session`, or integer N |
+| `recallMode` | `"hybrid"` | Retrieval strategy: `hybrid`, `context`, or `tools` |
 | `sessionStrategy` | `"per-directory"` | How sessions are scoped |
 | `sessionPeerPrefix` | `false` | Prefix session names with peer name |
 | `contextTokens` | *(Honcho default)* | Max tokens for context prefetch |
+| `dialecticReasoningLevel` | `"low"` | Floor for dialectic reasoning: `minimal` / `low` / `medium` / `high` / `max` |
+| `dialecticMaxChars` | `600` | Char cap on dialectic results injected into system prompt |
 | `sessions` | `{}` | Manual session name overrides per directory |
 
+### Memory Modes
+
+| Mode | Effect |
+|------|--------|
+| `hybrid` | Write to both Honcho and local files (default) |
+| `honcho` | Honcho only — skip local file writes |
+| `local` | Local files only — skip all Honcho activity |
+
+Memory mode can be set globally or per-peer (user, agent1, agent2, etc):
+
+```json
+{
+  "memoryMode": {
+    "default": "hybrid",
+    "hermes": "honcho",
+    "user": "local"
+  }
+}
+```
+
+When both active peers resolve to `local`, Hermes skips all remote Honcho activity entirely — no client initialization, no session creation, no prefetch.
+
+### Recall Modes
+
+Controls how Honcho context reaches the agent:
+
+| Mode | Behavior |
+|------|----------|
+| `hybrid` | Prefetch context into system prompt + expose tools (default) |
+| `context` | Context injection only — no Honcho tools available |
+| `tools` | Tools only — no prefetch into system prompt |
+
+### Write Frequency
+
+| Setting | Behavior |
+|---------|----------|
+| `async` | Background thread writes (zero blocking, default) |
+| `turn` | Synchronous write after each turn |
+| `session` | Batched write at session end |
+| *integer N* | Write every N turns |
+
+### Session Strategies
+
+| Strategy | Session key | Use case |
+|----------|-------------|----------|
+| `per-directory` | CWD basename | Default. Each project gets its own session. |
+| `per-repo` | Git repo root name | Groups subdirectories under one session. |
+| `per-session` | Unique per run | Fresh session every time. |
+| `global` | Fixed `"global"` | Single cross-project session. |
+
+Resolution order: manual map > session title > strategy-derived key > platform key.
+
 ### Host-specific Configuration
 
-You can configure per-host settings for multi-application setups:
+For multi-application setups, use host blocks:
 
 ```json
 {
@@ -91,73 +158,158 @@ You can configure per-host settings for multi-application setups:
     "hermes": {
       "workspace": "my-workspace",
       "aiPeer": "hermes-assistant",
-      "linkedHosts": ["other-app"],
-      "contextTokens": 2000
+      "linkedHosts": ["claude-code"],
+      "contextTokens": 2000,
+      "dialecticReasoningLevel": "medium"
     }
   }
 }
 ```
 
-Host-specific fields override global fields. Resolution order:
-1. Explicit host block fields
-2. Global/flat fields from config root
-3. Defaults (host name used as workspace/peer)
+Host-specific fields override global fields. Resolution: host block > global fields > defaults.
 
 ### Hermes Config (`~/.hermes/config.yaml`)
 
-The `honcho` section in Hermes config is intentionally minimal — most configuration comes from the global `~/.honcho/config.json`:
+Intentionally minimal — most configuration comes from `~/.honcho/config.json`:
 
 ```yaml
 honcho: {}
 ```
 
-## The `query_user_context` Tool
+## How It Works
 
-When Honcho is active, Hermes gains access to the `query_user_context` tool. This lets the agent proactively ask Honcho about the user during conversations:
+### Async Prefetch Pipeline
 
-**Tool schema:**
-- **Name:** `query_user_context`
-- **Parameter:** `query` (string) — a natural language question about the user
-- **Toolset:** `honcho`
-
-**Example queries the agent might make:**
+Honcho context is fetched asynchronously to avoid blocking the response path:
 
 ```
-"What are this user's main goals?"
-"What communication style does this user prefer?"
-"What topics has this user discussed recently?"
-"What is this user's technical expertise level?"
+Turn N:
+  user message
+    → pop prefetch result from cache (from previous turn)
+    → inject into system prompt (user representation, AI representation, dialectic)
+    → LLM call
+    → response
+    → fire prefetch in background threads
+         → prefetch_context()   ─┐
+         → prefetch_dialectic() ─┴→ cache for Turn N+1
 ```
 
-The tool calls Honcho's dialectic chat API to retrieve relevant user context based on accumulated conversation history.
+Turn 1 is a cold start (no cache). All subsequent turns consume pre-warmed results with zero HTTP latency on the response path. The system prompt on turn 1 uses only static context to preserve prefix cache hits at the LLM provider.
 
-:::note
-The `query_user_context` tool is only available when Honcho is active (API key configured and session context set). It registers in the `honcho` toolset and its availability is checked dynamically.
+### Dual-Peer Architecture
+
+Both the user and AI have peer representations in Honcho:
+
+- **User peer** — observed from user messages. Honcho learns preferences, goals, communication style.
+- **AI peer** — observed from assistant messages (`observe_me=True`). Honcho builds a representation of the agent's knowledge and behavior.
+
+Both representations are injected into the system prompt when available.
+
+### Dynamic Reasoning Level
+
+Dialectic queries scale reasoning effort with message complexity:
+
+| Message length | Reasoning level |
+|----------------|-----------------|
+| < 120 chars | Config default (typically `low`) |
+| 120-400 chars | One level above default (cap: `high`) |
+| > 400 chars | Two levels above default (cap: `high`) |
+
+`max` is never selected automatically.
+
+### Gateway Integration
+
+The gateway creates short-lived `AIAgent` instances per request. Honcho managers are owned at the gateway session layer (`_honcho_managers` dict) so they persist across requests within the same session and flush at real session boundaries (reset, resume, expiry, server stop).
+
+## Tools
+
+When Honcho is active, four tools become available. Availability is gated dynamically — they are invisible when Honcho is disabled.
+
+### `honcho_profile`
+
+Fast peer card retrieval (no LLM). Returns a curated list of key facts about the user.
+
+### `honcho_search`
+
+Semantic search over memory (no LLM). Returns raw excerpts ranked by relevance. Cheaper and faster than `honcho_context` — good for factual lookups.
+
+Parameters:
+- `query` (string) — search query
+- `max_tokens` (integer, optional) — result token budget
+
+### `honcho_context`
+
+Dialectic Q&A powered by Honcho's LLM. Synthesizes an answer from accumulated conversation history.
+
+Parameters:
+- `query` (string) — natural language question
+- `peer` (string, optional) — `"user"` (default) or `"ai"`. Querying `"ai"` asks about the assistant's own history and identity.
+
+### `honcho_conclude`
+
+Writes a fact to Honcho memory. Use when the user explicitly states a preference, correction, or project context worth remembering. Feeds into the user's peer card and representation.
+
+Parameters:
+- `conclusion` (string) — the fact to persist
+
+## CLI Commands
+
+```
+hermes honcho setup                        # Interactive setup wizard
+hermes honcho status                       # Show config and connection status
+hermes honcho sessions                     # List directory → session name mappings
+hermes honcho map <name>                   # Map current directory to a session name
+hermes honcho peer                         # Show peer names and dialectic settings
+hermes honcho peer --user NAME             # Set user peer name
+hermes honcho peer --ai NAME               # Set AI peer name
+hermes honcho peer --reasoning LEVEL       # Set dialectic reasoning level
+hermes honcho mode                         # Show current memory mode
+hermes honcho mode [hybrid|honcho|local]   # Set memory mode
+hermes honcho tokens                       # Show token budget settings
+hermes honcho tokens --context N           # Set context token cap
+hermes honcho tokens --dialectic N         # Set dialectic char cap
+hermes honcho identity                     # Show AI peer identity
+hermes honcho identity <file>              # Seed AI peer identity from file (SOUL.md, etc.)
+hermes honcho migrate                      # Migration guide: OpenClaw → Hermes + Honcho
+```
+
+### Doctor Integration
+
+`hermes doctor` includes a Honcho section that validates config, API key, and connection status.
+
+## Migration
+
+### From Local Memory
+
+When Honcho activates on an instance with existing local history, migration runs automatically:
+
+1. **Conversation history** — prior messages are uploaded as an XML transcript file
+2. **Memory files** — existing `MEMORY.md`, `USER.md`, and `SOUL.md` are uploaded for context
+
+### From OpenClaw
+
+```bash
+hermes honcho migrate
+```
+
+Walks through converting an OpenClaw native Honcho setup to the shared `~/.honcho/config.json` format.
+
+## AI Peer Identity
+
+Honcho can build a representation of the AI assistant over time (via `observe_me=True`). You can also seed the AI peer explicitly:
+
+```bash
+hermes honcho identity ~/.hermes/SOUL.md
+```
+
+This uploads the file content through Honcho's observation pipeline. The AI peer representation is then injected into the system prompt alongside the user's, giving the agent awareness of its own accumulated identity.
+
+```bash
+hermes honcho identity --show
+```
+
+Shows the current AI peer representation from Honcho.
+
+:::tip
+Honcho is fully opt-in — zero behavior change when disabled or unconfigured. All Honcho calls are non-fatal; if the service is unreachable, the agent continues normally.
 :::
-
-## Session Management
-
-Honcho sessions track conversation history for user modeling:
-
-- **Session creation** — sessions are created or resumed automatically based on session keys (e.g., `telegram:123456` or CLI session IDs)
-- **Message syncing** — new messages are synced to Honcho incrementally (only unsynced messages)
-- **Peer configuration** — user messages are observed for learning; assistant messages are not
-- **Context prefetch** — before responding, Hermes can prefetch user context (representation + peer card) in a single API call
-- **Session rotation** — when sessions reset, old data is preserved in Honcho for continued user modeling
-
-## Migration from Local Memory
-
-When Honcho is activated on an instance that already has local conversation history:
-
-1. **Conversation history** — prior messages can be uploaded to Honcho as a transcript file
-2. **Memory files** — existing `MEMORY.md` and `USER.md` files can be uploaded for context
-
-This ensures Honcho has the full picture even when activated mid-conversation.
-
-## Use Cases
-
-- **Personalized responses** — Honcho learns how each user prefers to communicate
-- **Goal tracking** — remembers what users are working toward across sessions
-- **Expertise adaptation** — adjusts technical depth based on user's background
-- **Cross-platform memory** — same user understanding across CLI, Telegram, Discord, etc.
-- **Multi-user support** — each user (via messaging platforms) gets their own user model
diff --git a/website/docs/user-guide/features/memory.md b/website/docs/user-guide/features/memory.md
index f4c778b6..c0810b69 100644
--- a/website/docs/user-guide/features/memory.md
+++ b/website/docs/user-guide/features/memory.md
@@ -209,41 +209,10 @@ memory:
 
 ## Honcho Integration (Cross-Session User Modeling)
 
-For deeper, AI-generated user understanding that works across tools, you can optionally enable [Honcho](https://honcho.dev/) by Plastic Labs. Honcho runs alongside existing memory — USER.md stays as-is, and Honcho adds an additional layer of context.
-
-When enabled:
-- **Prefetch**: Each turn, Honcho's user representation is injected into the system prompt
-- **Sync**: After each conversation, messages are synced to Honcho
-- **Query tool**: The agent can actively query its understanding of you via `query_user_context`
-
-**Setup:**
+For deeper, AI-generated user understanding that works across sessions and platforms, you can enable [Honcho Memory](./honcho.md). Honcho runs alongside built-in memory in `hybrid` mode (the default) — `MEMORY.md` and `USER.md` stay as-is, and Honcho adds a persistent user modeling layer on top.
 
 ```bash
-# 1. Install the optional dependency
-uv pip install honcho-ai
-
-# 2. Get an API key from https://app.honcho.dev
-
-# 3. Create ~/.honcho/config.json
-cat > ~/.honcho/config.json << 'EOF'
-{
-  "enabled": true,
-  "apiKey": "your-honcho-api-key",
-  "peerName": "your-name",
-  "hosts": {
-    "hermes": {
-      "workspace": "hermes"
-    }
-  }
-}
-EOF
+hermes honcho setup
 ```
 
-Or via environment variable:
-```bash
-hermes config set HONCHO_API_KEY your-key
-```
-
-:::tip
-Honcho is fully opt-in — zero behavior change when disabled or unconfigured. All Honcho calls are non-fatal; if the service is unreachable, the agent continues normally.
-:::
+See the [Honcho Memory](./honcho.md) docs for full configuration, tools, and CLI reference.

From 5489c66cdf0bbce254a9caf7aad4a01971166ea7 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Tue, 10 Mar 2026 16:54:34 -0400
Subject: [PATCH 11/23] docs(honcho): restore use cases, example queries, and
 configurability language

Adds back use cases section and example tool queries from the original
docs. Clarifies that built-in memory and Honcho can work together or be
configured separately via memoryMode.
---
 website/docs/user-guide/features/honcho.md | 23 ++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md
index 2a257a89..b189c898 100644
--- a/website/docs/user-guide/features/honcho.md
+++ b/website/docs/user-guide/features/honcho.md
@@ -9,9 +9,9 @@ sidebar_position: 8
 
 [Honcho](https://honcho.dev) is an AI-native memory system that gives Hermes persistent, cross-session understanding of users. While Hermes has built-in memory (`MEMORY.md` and `USER.md`), Honcho adds a deeper layer of **user modeling** — learning preferences, goals, communication style, and context across conversations via a dual-peer architecture where both the user and the AI build representations over time.
 
-## Works Alongside Built in Memory
+## Works Alongside Built-in Memory
 
-Runs `hybrid` (`local` + `honcho`) by default. 
+Hermes has two memory systems that can work together or be configured separately. In `hybrid` mode (the default), both run side by side — Honcho adds cross-session user modeling while local files handle agent-level notes.
 
 | Feature | Built-in Memory | Honcho Memory |
 |---------|----------------|---------------|
@@ -22,6 +22,8 @@ Runs `hybrid` (`local` + `honcho`) by default.
 | Content | Manually curated by the agent | Automatically learned from conversations |
 | Write surface | `memory` tool (add/replace/remove) | `honcho_conclude` tool (persist facts) |
 
+Set `memoryMode` to `honcho` to use Honcho exclusively, or `local` to disable Honcho and use only local files. See [Memory Modes](#memory-modes) for per-peer configuration.
+
 
 ## Setup
 
@@ -245,6 +247,15 @@ Parameters:
 - `query` (string) — natural language question
 - `peer` (string, optional) — `"user"` (default) or `"ai"`. Querying `"ai"` asks about the assistant's own history and identity.
 
+Example queries the agent might make:
+
+```
+"What are this user's main goals?"
+"What communication style does this user prefer?"
+"What topics has this user discussed recently?"
+"What is this user's technical expertise level?"
+```
+
 ### `honcho_conclude`
 
 Writes a fact to Honcho memory. Use when the user explicitly states a preference, correction, or project context worth remembering. Feeds into the user's peer card and representation.
@@ -310,6 +321,14 @@ hermes honcho identity --show
 
 Shows the current AI peer representation from Honcho.
 
+## Use Cases
+
+- **Personalized responses** — Honcho learns how each user prefers to communicate
+- **Goal tracking** — remembers what users are working toward across sessions
+- **Expertise adaptation** — adjusts technical depth based on user's background
+- **Cross-platform memory** — same user understanding across CLI, Telegram, Discord, etc.
+- **Multi-user support** — each user (via messaging platforms) gets their own user model
+
 :::tip
 Honcho is fully opt-in — zero behavior change when disabled or unconfigured. All Honcho calls are non-fatal; if the service is unreachable, the agent continues normally.
 :::

From c90ba029ce79160cff052bcddad810716846a7ad Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Tue, 10 Mar 2026 17:00:52 -0400
Subject: [PATCH 12/23] refactor(honcho): write all host-scoped settings into
 hosts block

Setup wizard now writes memoryMode, writeFrequency, recallMode, and
sessionStrategy into hosts.hermes instead of the config root. Client
resolution updated to read sessionStrategy and sessionPeerPrefix from
host block first. Docs updated to show hosts-based config as the default
example so other integrations can coexist cleanly.
---
 honcho_integration/cli.py                  | 28 ++++++++++----------
 honcho_integration/client.py               | 12 +++++++--
 website/docs/user-guide/features/honcho.md | 30 +++++++++++++---------
 3 files changed, 42 insertions(+), 28 deletions(-)

diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py
index 6489cd09..bcd0f1da 100644
--- a/honcho_integration/cli.py
+++ b/honcho_integration/cli.py
@@ -121,20 +121,20 @@ def cmd_setup(args) -> None:
 
     hermes_host.setdefault("aiPeer", HOST)
 
-    # Memory mode
-    current_mode = cfg.get("memoryMode", "hybrid")
+    # Memory mode (host-scoped)
+    current_mode = hermes_host.get("memoryMode") or cfg.get("memoryMode", "hybrid")
     print(f"\n  Memory mode options:")
     print("    hybrid  — write to both Honcho and local MEMORY.md (default)")
     print("    honcho  — Honcho only, skip MEMORY.md writes")
     print("    local   — MEMORY.md only, Honcho disabled")
     new_mode = _prompt("Memory mode", default=current_mode)
     if new_mode in ("hybrid", "honcho", "local"):
-        cfg["memoryMode"] = new_mode
+        hermes_host["memoryMode"] = new_mode
     else:
-        cfg["memoryMode"] = "hybrid"
+        hermes_host["memoryMode"] = "hybrid"
 
-    # Write frequency
-    current_wf = str(cfg.get("writeFrequency", "async"))
+    # Write frequency (host-scoped)
+    current_wf = str(hermes_host.get("writeFrequency") or cfg.get("writeFrequency", "async"))
     print(f"\n  Write frequency options:")
     print("    async   — background thread, no token cost (recommended)")
     print("    turn    — sync write after every turn")
@@ -142,22 +142,22 @@ def cmd_setup(args) -> None:
     print("    N       — write every N turns (e.g. 5)")
     new_wf = _prompt("Write frequency", default=current_wf)
     try:
-        cfg["writeFrequency"] = int(new_wf)
+        hermes_host["writeFrequency"] = int(new_wf)
     except (ValueError, TypeError):
-        cfg["writeFrequency"] = new_wf if new_wf in ("async", "turn", "session") else "async"
+        hermes_host["writeFrequency"] = new_wf if new_wf in ("async", "turn", "session") else "async"
 
-    # Recall mode
-    current_recall = cfg.get("recallMode", "hybrid")
+    # Recall mode (host-scoped)
+    current_recall = hermes_host.get("recallMode") or cfg.get("recallMode", "hybrid")
     print(f"\n  Recall mode options:")
     print("    hybrid  — pre-warmed context + memory tools available (default)")
     print("    context — pre-warmed context only, memory tools suppressed")
     print("    tools   — no pre-loaded context, rely on tool calls only")
     new_recall = _prompt("Recall mode", default=current_recall)
     if new_recall in ("hybrid", "context", "tools"):
-        cfg["recallMode"] = new_recall
+        hermes_host["recallMode"] = new_recall
 
-    # Session strategy
-    current_strat = cfg.get("sessionStrategy", "per-session")
+    # Session strategy (host-scoped)
+    current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session")
     print(f"\n  Session strategy options:")
     print("    per-session   — new Honcho session each run, named by Hermes session ID (default)")
     print("    per-repo      — one session per git repository (uses repo root name)")
@@ -165,7 +165,7 @@ def cmd_setup(args) -> None:
     print("    global        — single session across all directories")
     new_strat = _prompt("Session strategy", default=current_strat)
     if new_strat in ("per-session", "per-repo", "per-directory", "global"):
-        cfg["sessionStrategy"] = new_strat
+        hermes_host["sessionStrategy"] = new_strat
 
     cfg.setdefault("enabled", True)
     cfg.setdefault("saveMessages", True)
diff --git a/honcho_integration/client.py b/honcho_integration/client.py
index 3f3f174d..015c4458 100644
--- a/honcho_integration/client.py
+++ b/honcho_integration/client.py
@@ -201,8 +201,16 @@ class HonchoClientConfig:
                 or raw.get("recallMode")
                 or "hybrid"
             ),
-            session_strategy=raw.get("sessionStrategy", "per-session"),
-            session_peer_prefix=raw.get("sessionPeerPrefix", False),
+            session_strategy=(
+                host_block.get("sessionStrategy")
+                or raw.get("sessionStrategy")
+                or "per-session"
+            ),
+            session_peer_prefix=(
+                host_block.get("sessionPeerPrefix")
+                if "sessionPeerPrefix" in host_block
+                else raw.get("sessionPeerPrefix", False)
+            ),
             sessions=raw.get("sessions", {}),
             raw=raw,
         )
diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md
index b189c898..242fffa2 100644
--- a/website/docs/user-guide/features/honcho.md
+++ b/website/docs/user-guide/features/honcho.md
@@ -37,16 +37,16 @@ The setup wizard walks through API key, peer names, workspace, memory mode, writ
 
 ### Manual Setup
 
-#### 1. Install the Client Library
+#### 1. Get an API Key
+
+Go to [app.honcho.dev](https://app.honcho.dev) > Settings > API Keys.
+
+#### 2. Install the Client Library
 
 ```bash
 pip install 'honcho-ai>=2.0.1'
 ```
 
-#### 2. Get an API Key
-
-Go to [app.honcho.dev](https://app.honcho.dev) > Settings > API Keys.
-
 #### 3. Configure
 
 Honcho reads from `~/.honcho/config.json` (shared across all Honcho-enabled applications):
@@ -54,17 +54,23 @@ Honcho reads from `~/.honcho/config.json` (shared across all Honcho-enabled appl
 ```json
 {
   "apiKey": "your-honcho-api-key",
-  "workspace": "hermes",
   "peerName": "your-name",
-  "aiPeer": "hermes",
-  "memoryMode": "hybrid",
-  "writeFrequency": "async",
-  "recallMode": "hybrid",
-  "sessionStrategy": "per-directory",
-  "enabled": true
+  "enabled": true,
+  "hosts": {
+    "hermes": {
+      "workspace": "hermes",
+      "aiPeer": "hermes",
+      "memoryMode": "hybrid",
+      "writeFrequency": "async",
+      "recallMode": "hybrid",
+      "sessionStrategy": "per-directory"
+    }
+  }
 }
 ```
 
+The `hosts` structure lets multiple integrations share the same config file. Each host (Hermes, Claude Code, Cursor, etc.) reads its own block while sharing global fields like `apiKey` and `peerName`.
+
 Or set the API key as an environment variable:
 
 ```bash

From 4c54c2709c1ce4563543e0678ea0f5030b78706c Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Tue, 10 Mar 2026 17:11:58 -0400
Subject: [PATCH 13/23] Revert "refactor(honcho): write all host-scoped
 settings into hosts block"

This reverts commit c90ba029ce79160cff052bcddad810716846a7ad.
---
 honcho_integration/cli.py                  | 28 ++++++++++----------
 honcho_integration/client.py               | 12 ++-------
 website/docs/user-guide/features/honcho.md | 30 +++++++++-------------
 3 files changed, 28 insertions(+), 42 deletions(-)

diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py
index bcd0f1da..6489cd09 100644
--- a/honcho_integration/cli.py
+++ b/honcho_integration/cli.py
@@ -121,20 +121,20 @@ def cmd_setup(args) -> None:
 
     hermes_host.setdefault("aiPeer", HOST)
 
-    # Memory mode (host-scoped)
-    current_mode = hermes_host.get("memoryMode") or cfg.get("memoryMode", "hybrid")
+    # Memory mode
+    current_mode = cfg.get("memoryMode", "hybrid")
     print(f"\n  Memory mode options:")
     print("    hybrid  — write to both Honcho and local MEMORY.md (default)")
     print("    honcho  — Honcho only, skip MEMORY.md writes")
     print("    local   — MEMORY.md only, Honcho disabled")
     new_mode = _prompt("Memory mode", default=current_mode)
     if new_mode in ("hybrid", "honcho", "local"):
-        hermes_host["memoryMode"] = new_mode
+        cfg["memoryMode"] = new_mode
     else:
-        hermes_host["memoryMode"] = "hybrid"
+        cfg["memoryMode"] = "hybrid"
 
-    # Write frequency (host-scoped)
-    current_wf = str(hermes_host.get("writeFrequency") or cfg.get("writeFrequency", "async"))
+    # Write frequency
+    current_wf = str(cfg.get("writeFrequency", "async"))
     print(f"\n  Write frequency options:")
     print("    async   — background thread, no token cost (recommended)")
     print("    turn    — sync write after every turn")
@@ -142,22 +142,22 @@ def cmd_setup(args) -> None:
     print("    N       — write every N turns (e.g. 5)")
     new_wf = _prompt("Write frequency", default=current_wf)
     try:
-        hermes_host["writeFrequency"] = int(new_wf)
+        cfg["writeFrequency"] = int(new_wf)
     except (ValueError, TypeError):
-        hermes_host["writeFrequency"] = new_wf if new_wf in ("async", "turn", "session") else "async"
+        cfg["writeFrequency"] = new_wf if new_wf in ("async", "turn", "session") else "async"
 
-    # Recall mode (host-scoped)
-    current_recall = hermes_host.get("recallMode") or cfg.get("recallMode", "hybrid")
+    # Recall mode
+    current_recall = cfg.get("recallMode", "hybrid")
     print(f"\n  Recall mode options:")
     print("    hybrid  — pre-warmed context + memory tools available (default)")
     print("    context — pre-warmed context only, memory tools suppressed")
     print("    tools   — no pre-loaded context, rely on tool calls only")
     new_recall = _prompt("Recall mode", default=current_recall)
     if new_recall in ("hybrid", "context", "tools"):
-        hermes_host["recallMode"] = new_recall
+        cfg["recallMode"] = new_recall
 
-    # Session strategy (host-scoped)
-    current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session")
+    # Session strategy
+    current_strat = cfg.get("sessionStrategy", "per-session")
     print(f"\n  Session strategy options:")
     print("    per-session   — new Honcho session each run, named by Hermes session ID (default)")
     print("    per-repo      — one session per git repository (uses repo root name)")
@@ -165,7 +165,7 @@ def cmd_setup(args) -> None:
     print("    global        — single session across all directories")
     new_strat = _prompt("Session strategy", default=current_strat)
     if new_strat in ("per-session", "per-repo", "per-directory", "global"):
-        hermes_host["sessionStrategy"] = new_strat
+        cfg["sessionStrategy"] = new_strat
 
     cfg.setdefault("enabled", True)
     cfg.setdefault("saveMessages", True)
diff --git a/honcho_integration/client.py b/honcho_integration/client.py
index 015c4458..3f3f174d 100644
--- a/honcho_integration/client.py
+++ b/honcho_integration/client.py
@@ -201,16 +201,8 @@ class HonchoClientConfig:
                 or raw.get("recallMode")
                 or "hybrid"
             ),
-            session_strategy=(
-                host_block.get("sessionStrategy")
-                or raw.get("sessionStrategy")
-                or "per-session"
-            ),
-            session_peer_prefix=(
-                host_block.get("sessionPeerPrefix")
-                if "sessionPeerPrefix" in host_block
-                else raw.get("sessionPeerPrefix", False)
-            ),
+            session_strategy=raw.get("sessionStrategy", "per-session"),
+            session_peer_prefix=raw.get("sessionPeerPrefix", False),
             sessions=raw.get("sessions", {}),
             raw=raw,
         )
diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md
index 242fffa2..b189c898 100644
--- a/website/docs/user-guide/features/honcho.md
+++ b/website/docs/user-guide/features/honcho.md
@@ -37,16 +37,16 @@ The setup wizard walks through API key, peer names, workspace, memory mode, writ
 
 ### Manual Setup
 
-#### 1. Get an API Key
-
-Go to [app.honcho.dev](https://app.honcho.dev) > Settings > API Keys.
-
-#### 2. Install the Client Library
+#### 1. Install the Client Library
 
 ```bash
 pip install 'honcho-ai>=2.0.1'
 ```
 
+#### 2. Get an API Key
+
+Go to [app.honcho.dev](https://app.honcho.dev) > Settings > API Keys.
+
 #### 3. Configure
 
 Honcho reads from `~/.honcho/config.json` (shared across all Honcho-enabled applications):
@@ -54,23 +54,17 @@ Honcho reads from `~/.honcho/config.json` (shared across all Honcho-enabled appl
 ```json
 {
   "apiKey": "your-honcho-api-key",
+  "workspace": "hermes",
   "peerName": "your-name",
-  "enabled": true,
-  "hosts": {
-    "hermes": {
-      "workspace": "hermes",
-      "aiPeer": "hermes",
-      "memoryMode": "hybrid",
-      "writeFrequency": "async",
-      "recallMode": "hybrid",
-      "sessionStrategy": "per-directory"
-    }
-  }
+  "aiPeer": "hermes",
+  "memoryMode": "hybrid",
+  "writeFrequency": "async",
+  "recallMode": "hybrid",
+  "sessionStrategy": "per-directory",
+  "enabled": true
 }
 ```
 
-The `hosts` structure lets multiple integrations share the same config file. Each host (Hermes, Claude Code, Cursor, etc.) reads its own block while sharing global fields like `apiKey` and `peerName`.
-
 Or set the API key as an environment variable:
 
 ```bash

From 047b118299fb5cbac28a4517d30b72549f3559e1 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Wed, 11 Mar 2026 11:46:37 -0400
Subject: [PATCH 14/23] fix(honcho): resolve review blockers for merge

Address merge-blocking review feedback by removing unsafe signal handler overrides, wiring next-turn Honcho prefetch, restoring per-directory session defaults, and exposing all Honcho tools to the model surface. Also harden prefetch cache access with public thread-safe accessors and remove duplicate browser cleanup code.

Made-with: Cursor
---
 honcho_integration/cli.py                     | 10 ++-
 honcho_integration/client.py                  |  4 +-
 honcho_integration/session.py                 | 25 +++++-
 run_agent.py                                  | 85 +++++++++++++------
 tests/honcho_integration/test_async_memory.py | 19 +++++
 tests/honcho_integration/test_client.py       |  4 +-
 tests/test_run_agent.py                       | 57 +++++++++++--
 tools/browser_tool.py                         | 21 +----
 toolsets.py                                   |  6 +-
 9 files changed, 162 insertions(+), 69 deletions(-)

diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py
index 6489cd09..15d15869 100644
--- a/honcho_integration/cli.py
+++ b/honcho_integration/cli.py
@@ -157,11 +157,11 @@ def cmd_setup(args) -> None:
         cfg["recallMode"] = new_recall
 
     # Session strategy
-    current_strat = cfg.get("sessionStrategy", "per-session")
+    current_strat = cfg.get("sessionStrategy", "per-directory")
     print(f"\n  Session strategy options:")
-    print("    per-session   — new Honcho session each run, named by Hermes session ID (default)")
+    print("    per-directory — one session per working directory (default)")
     print("    per-repo      — one session per git repository (uses repo root name)")
-    print("    per-directory — one session per working directory")
+    print("    per-session   — new Honcho session each run, named by Hermes session ID")
     print("    global        — single session across all directories")
     new_strat = _prompt("Session strategy", default=current_strat)
     if new_strat in ("per-session", "per-repo", "per-directory", "global"):
@@ -199,6 +199,7 @@ def cmd_setup(args) -> None:
     print(f"    honcho_context  — ask Honcho a question about you (LLM-synthesized)")
     print(f"    honcho_search       — semantic search over your history (no LLM)")
     print(f"    honcho_profile      — your peer card, key facts (no LLM)")
+    print(f"    honcho_conclude     — persist a user fact to Honcho memory (no LLM)")
     print(f"\n  Other commands:")
     print(f"    hermes honcho status     — show full config")
     print(f"    hermes honcho mode       — show or change memory mode")
@@ -710,10 +711,11 @@ def cmd_migrate(args) -> None:
     print("    honcho_context   — ask Honcho a question, get a synthesized answer (LLM)")
     print("    honcho_search        — semantic search over stored context (no LLM)")
     print("    honcho_profile       — fast peer card snapshot (no LLM)")
+    print("    honcho_conclude      — write a conclusion/fact back to memory (no LLM)")
     print()
     print("  Session naming")
     print("    OpenClaw: no persistent session concept — files are global.")
-    print("    Hermes:   per-session by default — each run gets a new Honcho session")
+    print("    Hermes:   per-directory by default — each project gets its own session")
     print("              Map a custom name:  hermes honcho map <session-name>")
 
     # ── Step 6: Next steps ────────────────────────────────────────────────────
diff --git a/honcho_integration/client.py b/honcho_integration/client.py
index 3f3f174d..729bb42c 100644
--- a/honcho_integration/client.py
+++ b/honcho_integration/client.py
@@ -95,7 +95,7 @@ class HonchoClientConfig:
     # "tools"   — no pre-loaded context, rely on tool calls only
     recall_mode: str = "hybrid"
     # Session resolution
-    session_strategy: str = "per-session"
+    session_strategy: str = "per-directory"
     session_peer_prefix: bool = False
     sessions: dict[str, str] = field(default_factory=dict)
     # Raw global config for anything else consumers need
@@ -201,7 +201,7 @@ class HonchoClientConfig:
                 or raw.get("recallMode")
                 or "hybrid"
             ),
-            session_strategy=raw.get("sessionStrategy", "per-session"),
+            session_strategy=raw.get("sessionStrategy", "per-directory"),
             session_peer_prefix=raw.get("sessionPeerPrefix", False),
             sessions=raw.get("sessions", {}),
             raw=raw,
diff --git a/honcho_integration/session.py b/honcho_integration/session.py
index e671f1c8..19c41989 100644
--- a/honcho_integration/session.py
+++ b/honcho_integration/session.py
@@ -103,6 +103,7 @@ class HonchoSessionManager:
         # Prefetch caches: session_key → last result (consumed once per turn)
         self._context_cache: dict[str, dict] = {}
         self._dialectic_cache: dict[str, str] = {}
+        self._prefetch_cache_lock = threading.Lock()
         self._dialectic_reasoning_level: str = (
             config.dialectic_reasoning_level if config else "low"
         )
@@ -496,18 +497,26 @@ class HonchoSessionManager:
         def _run():
             result = self.dialectic_query(session_key, query)
             if result:
-                self._dialectic_cache[session_key] = result
+                self.set_dialectic_result(session_key, result)
 
         t = threading.Thread(target=_run, name="honcho-dialectic-prefetch", daemon=True)
         t.start()
 
+    def set_dialectic_result(self, session_key: str, result: str) -> None:
+        """Store a prefetched dialectic result in a thread-safe way."""
+        if not result:
+            return
+        with self._prefetch_cache_lock:
+            self._dialectic_cache[session_key] = result
+
     def pop_dialectic_result(self, session_key: str) -> str:
         """
         Return and clear the cached dialectic result for this session.
 
         Returns empty string if no result is ready yet.
         """
-        return self._dialectic_cache.pop(session_key, "")
+        with self._prefetch_cache_lock:
+            return self._dialectic_cache.pop(session_key, "")
 
     def prefetch_context(self, session_key: str, user_message: str | None = None) -> None:
         """
@@ -519,18 +528,26 @@ class HonchoSessionManager:
         def _run():
             result = self.get_prefetch_context(session_key, user_message)
             if result:
-                self._context_cache[session_key] = result
+                self.set_context_result(session_key, result)
 
         t = threading.Thread(target=_run, name="honcho-context-prefetch", daemon=True)
         t.start()
 
+    def set_context_result(self, session_key: str, result: dict[str, str]) -> None:
+        """Store a prefetched context result in a thread-safe way."""
+        if not result:
+            return
+        with self._prefetch_cache_lock:
+            self._context_cache[session_key] = result
+
     def pop_context_result(self, session_key: str) -> dict[str, str]:
         """
         Return and clear the cached context result for this session.
 
         Returns empty dict if no result is ready yet (first turn).
         """
-        return self._context_cache.pop(session_key, {})
+        with self._prefetch_cache_lock:
+            return self._context_cache.pop(session_key, {})
 
     def get_prefetch_context(self, session_key: str, user_message: str | None = None) -> dict[str, str]:
         """
diff --git a/run_agent.py b/run_agent.py
index 9c9607af..0115e8e3 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -20,6 +20,7 @@ Usage:
     response = agent.run_conversation("Tell me about the latest Python updates")
 """
 
+import atexit
 import copy
 import hashlib
 import json
@@ -31,6 +32,7 @@ import re
 import sys
 import time
 import threading
+import weakref
 from types import SimpleNamespace
 import uuid
 from typing import List, Dict, Any, Optional
@@ -550,6 +552,7 @@ class AIAgent:
         self._honcho = None  # HonchoSessionManager | None
         self._honcho_session_key = honcho_session_key
         self._honcho_config = None  # HonchoClientConfig | None
+        self._honcho_exit_hook_registered = False
         if not skip_memory:
             try:
                 if honcho_manager is not None:
@@ -1427,28 +1430,46 @@ class AIAgent:
             try:
                 ctx = self._honcho.get_prefetch_context(self._honcho_session_key)
                 if ctx:
-                    self._honcho._context_cache[self._honcho_session_key] = ctx
+                    self._honcho.set_context_result(self._honcho_session_key, ctx)
                     logger.debug("Honcho context pre-warmed for first turn")
             except Exception as exc:
                 logger.debug("Honcho context prefetch failed (non-fatal): %s", exc)
 
-        import signal as _signal
-        import threading as _threading
+        self._register_honcho_exit_hook()
 
-        honcho_ref = self._honcho
+    def _register_honcho_exit_hook(self) -> None:
+        """Register a process-exit flush hook without clobbering signal handlers."""
+        if self._honcho_exit_hook_registered or not self._honcho:
+            return
 
-        if _threading.current_thread() is _threading.main_thread():
-            def _honcho_flush_handler(signum, frame):
-                try:
-                    honcho_ref.flush_all()
-                except Exception:
-                    pass
-                if signum == _signal.SIGINT:
-                    raise KeyboardInterrupt
-                raise SystemExit(0)
+        honcho_ref = weakref.ref(self._honcho)
 
-            _signal.signal(_signal.SIGTERM, _honcho_flush_handler)
-            _signal.signal(_signal.SIGINT, _honcho_flush_handler)
+        def _flush_honcho_on_exit():
+            manager = honcho_ref()
+            if manager is None:
+                return
+            try:
+                manager.flush_all()
+            except Exception as exc:
+                logger.debug("Honcho flush on exit failed (non-fatal): %s", exc)
+
+        atexit.register(_flush_honcho_on_exit)
+        self._honcho_exit_hook_registered = True
+
+    def _queue_honcho_prefetch(self, user_message: str) -> None:
+        """Queue turn-end Honcho prefetch so the next turn can consume cached results."""
+        if not self._honcho or not self._honcho_session_key:
+            return
+
+        recall_mode = (self._honcho_config.recall_mode if self._honcho_config else "hybrid")
+        if recall_mode == "tools":
+            return
+
+        try:
+            self._honcho.prefetch_context(self._honcho_session_key, user_message)
+            self._honcho.prefetch_dialectic(self._honcho_session_key, user_message or "What were we working on?")
+        except Exception as exc:
+            logger.debug("Honcho background prefetch failed (non-fatal): %s", exc)
 
     def _honcho_prefetch(self, user_message: str) -> str:
         """Assemble the first-turn Honcho context from the pre-warmed cache."""
@@ -1472,6 +1493,10 @@ class AIAgent:
                 if ai_card:
                     parts.append(ai_card)
 
+            dialectic = self._honcho.pop_dialectic_result(self._honcho_session_key)
+            if dialectic:
+                parts.append(f"## Continuity synthesis\n{dialectic}")
+
             if not parts:
                 return ""
             header = (
@@ -3379,15 +3404,23 @@ class AIAgent:
             )
             self._iters_since_skill = 0
 
-        # Honcho: on the first turn only, read the pre-warmed context snapshot and
-        # bake it into the system prompt. We intentionally avoid per-turn refreshes
-        # here because changing the system prompt would destroy provider prompt-cache
-        # reuse for the rest of the session.
+        # Honcho prefetch consumption:
+        # - First turn: bake into cached system prompt (stable for the session).
+        # - Later turns: inject as ephemeral system context for this API call only.
+        #
+        # This keeps the persisted/cached prompt stable while still allowing
+        # turn N to consume background prefetch results from turn N-1.
         self._honcho_context = ""
+        self._honcho_turn_context = ""
         _recall_mode = (self._honcho_config.recall_mode if self._honcho_config else "hybrid")
-        if self._honcho and self._honcho_session_key and not conversation_history and _recall_mode != "tools":
+        if self._honcho and self._honcho_session_key and _recall_mode != "tools":
             try:
-                self._honcho_context = self._honcho_prefetch(user_message)
+                prefetched_context = self._honcho_prefetch(user_message)
+                if prefetched_context:
+                    if not conversation_history:
+                        self._honcho_context = prefetched_context
+                    else:
+                        self._honcho_turn_context = prefetched_context
             except Exception as e:
                 logger.debug("Honcho prefetch failed (non-fatal): %s", e)
 
@@ -3566,15 +3599,12 @@ class AIAgent:
                 api_messages.append(api_msg)
 
             # Build the final system message: cached prompt + ephemeral system prompt.
-            # The ephemeral part is appended here (not baked into the cached prompt)
-            # so it stays out of the session DB and logs.
-            # Note: Honcho context is baked into _cached_system_prompt on the first
-            # turn and stored in the session DB, so it does NOT need to be injected
-            # here.  This keeps the system message identical across all turns in a
-            # session, maximizing Anthropic prompt cache hits.
+            # Ephemeral additions are API-call-time only (not persisted to session DB).
             effective_system = active_system_prompt or ""
             if self.ephemeral_system_prompt:
                 effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip()
+            if self._honcho_turn_context:
+                effective_system = (effective_system + "\n\n" + self._honcho_turn_context).strip()
             if effective_system:
                 api_messages = [{"role": "system", "content": effective_system}] + api_messages
 
@@ -4656,6 +4686,7 @@ class AIAgent:
         # Sync conversation to Honcho for user modeling
         if final_response and not interrupted:
             self._honcho_sync(original_user_message, final_response)
+            self._queue_honcho_prefetch(original_user_message)
 
         # Build result with interrupt info if applicable
         result = {
diff --git a/tests/honcho_integration/test_async_memory.py b/tests/honcho_integration/test_async_memory.py
index c8c4bf1b..52a03ac2 100644
--- a/tests/honcho_integration/test_async_memory.py
+++ b/tests/honcho_integration/test_async_memory.py
@@ -487,3 +487,22 @@ class TestNewConfigFieldDefaults:
         cfg = HonchoClientConfig(memory_mode="hybrid", peer_memory_modes={"hermes": "local"})
         assert cfg.peer_memory_mode("hermes") == "local"
         assert cfg.peer_memory_mode("other") == "hybrid"
+
+
+class TestPrefetchCacheAccessors:
+    def test_set_and_pop_context_result(self):
+        mgr = _make_manager(write_frequency="turn")
+        payload = {"representation": "Known user", "card": "prefers concise replies"}
+
+        mgr.set_context_result("cli:test", payload)
+
+        assert mgr.pop_context_result("cli:test") == payload
+        assert mgr.pop_context_result("cli:test") == {}
+
+    def test_set_and_pop_dialectic_result(self):
+        mgr = _make_manager(write_frequency="turn")
+
+        mgr.set_dialectic_result("cli:test", "Resume with toolset cleanup")
+
+        assert mgr.pop_dialectic_result("cli:test") == "Resume with toolset cleanup"
+        assert mgr.pop_dialectic_result("cli:test") == ""
diff --git a/tests/honcho_integration/test_client.py b/tests/honcho_integration/test_client.py
index d779d9a6..fb3d8373 100644
--- a/tests/honcho_integration/test_client.py
+++ b/tests/honcho_integration/test_client.py
@@ -25,7 +25,7 @@ class TestHonchoClientConfigDefaults:
         assert config.environment == "production"
         assert config.enabled is False
         assert config.save_messages is True
-        assert config.session_strategy == "per-session"
+        assert config.session_strategy == "per-directory"
         assert config.recall_mode == "hybrid"
         assert config.session_peer_prefix is False
         assert config.linked_hosts == []
@@ -140,7 +140,7 @@ class TestFromGlobalConfig:
         config_file = tmp_path / "config.json"
         config_file.write_text(json.dumps({"apiKey": "key"}))
         config = HonchoClientConfig.from_global_config(config_path=config_file)
-        assert config.session_strategy == "per-session"
+        assert config.session_strategy == "per-directory"
 
     def test_context_tokens_host_block_wins(self, tmp_path):
         """Host block contextTokens should override root."""
diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py
index 91bb83ae..f10be1b1 100644
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -1192,17 +1192,15 @@ class TestSystemPromptStability:
 
         assert "User prefers Python over JavaScript" in agent._cached_system_prompt
 
-    def test_honcho_prefetch_skipped_on_continuing_session(self):
-        """Honcho prefetch should not be called when conversation_history
-        is non-empty (continuing session)."""
+    def test_honcho_prefetch_runs_on_continuing_session(self):
+        """Honcho prefetch is consumed on continuing sessions via ephemeral context."""
         conversation_history = [
             {"role": "user", "content": "hello"},
             {"role": "assistant", "content": "hi there"},
         ]
-
-        # The guard: `not conversation_history` is False when history exists
-        should_prefetch = not conversation_history
-        assert should_prefetch is False
+        recall_mode = "hybrid"
+        should_prefetch = bool(conversation_history) and recall_mode != "tools"
+        assert should_prefetch is True
 
     def test_honcho_prefetch_runs_on_first_turn(self):
         """Honcho prefetch should run when conversation_history is empty."""
@@ -1273,4 +1271,49 @@ class TestHonchoActivation:
         assert agent._honcho is manager
         manager.get_or_create.assert_called_once_with("gateway-session")
         manager.get_prefetch_context.assert_called_once_with("gateway-session")
+        manager.set_context_result.assert_called_once_with(
+            "gateway-session",
+            {"representation": "Known user", "card": ""},
+        )
         mock_client.assert_not_called()
+
+
+class TestHonchoPrefetchScheduling:
+    def test_honcho_prefetch_includes_cached_dialectic(self, agent):
+        agent._honcho = MagicMock()
+        agent._honcho_session_key = "session-key"
+        agent._honcho.pop_context_result.return_value = {}
+        agent._honcho.pop_dialectic_result.return_value = "Continue with the migration checklist."
+
+        context = agent._honcho_prefetch("what next?")
+
+        assert "Continuity synthesis" in context
+        assert "migration checklist" in context
+
+    def test_queue_honcho_prefetch_skips_tools_mode(self, agent):
+        agent._honcho = MagicMock()
+        agent._honcho_session_key = "session-key"
+        agent._honcho_config = HonchoClientConfig(
+            enabled=True,
+            api_key="honcho-key",
+            recall_mode="tools",
+        )
+
+        agent._queue_honcho_prefetch("what next?")
+
+        agent._honcho.prefetch_context.assert_not_called()
+        agent._honcho.prefetch_dialectic.assert_not_called()
+
+    def test_queue_honcho_prefetch_runs_when_context_enabled(self, agent):
+        agent._honcho = MagicMock()
+        agent._honcho_session_key = "session-key"
+        agent._honcho_config = HonchoClientConfig(
+            enabled=True,
+            api_key="honcho-key",
+            recall_mode="hybrid",
+        )
+
+        agent._queue_honcho_prefetch("what next?")
+
+        agent._honcho.prefetch_context.assert_called_once_with("session-key", "what next?")
+        agent._honcho.prefetch_dialectic.assert_called_once_with("session-key", "what next?")
diff --git a/tools/browser_tool.py b/tools/browser_tool.py
index feee2e56..dd44549b 100644
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@@ -1640,25 +1640,6 @@ def _cleanup_old_recordings(max_age_hours=72):
         logger.debug("Recording cleanup error (non-critical): %s", e)
 
 
-def _cleanup_old_recordings(max_age_hours=72):
-    """Remove browser recordings older than max_age_hours to prevent disk bloat."""
-    import time
-    try:
-        hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
-        recordings_dir = hermes_home / "browser_recordings"
-        if not recordings_dir.exists():
-            return
-        cutoff = time.time() - (max_age_hours * 3600)
-        for f in recordings_dir.glob("session_*.webm"):
-            try:
-                if f.stat().st_mtime < cutoff:
-                    f.unlink()
-            except Exception:
-                pass
-    except Exception:
-        pass
-
-
 # ============================================================================
 # Cleanup and Management Functions
 # ============================================================================
@@ -1764,7 +1745,7 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
                 pid_file = os.path.join(socket_dir, f"{session_name}.pid")
                 if os.path.isfile(pid_file):
                     try:
-                        daemon_pid = int(open(pid_file).read().strip())
+                        daemon_pid = int(Path(pid_file).read_text().strip())
                         os.kill(daemon_pid, signal.SIGTERM)
                         logger.debug("Killed daemon pid %s for %s", daemon_pid, session_name)
                     except (ProcessLookupError, ValueError, PermissionError, OSError):
diff --git a/toolsets.py b/toolsets.py
index 50ddf5f9..dbf1d887 100644
--- a/toolsets.py
+++ b/toolsets.py
@@ -60,8 +60,8 @@ _HERMES_CORE_TOOLS = [
     "schedule_cronjob", "list_cronjobs", "remove_cronjob",
     # Cross-platform messaging (gated on gateway running via check_fn)
     "send_message",
-    # Honcho user context (gated on honcho being active via check_fn)
-    "honcho_context",
+    # Honcho memory tools (gated on honcho being active via check_fn)
+    "honcho_context", "honcho_profile", "honcho_search", "honcho_conclude",
     # Home Assistant smart home control (gated on HASS_TOKEN via check_fn)
     "ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service",
 ]
@@ -192,7 +192,7 @@ TOOLSETS = {
 
     "honcho": {
         "description": "Honcho AI-native memory for persistent cross-session user modeling",
-        "tools": ["honcho_context"],
+        "tools": ["honcho_context", "honcho_profile", "honcho_search", "honcho_conclude"],
         "includes": []
     },
 

From d987ff54a1c977330e9ff2c3fc905dd0e58d1cd6 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Wed, 11 Mar 2026 15:42:35 -0400
Subject: [PATCH 15/23] fix: change session_strategy default from per-directory
 to per-session

Matches Hermes' native session naming (title if set, otherwise
session-scoped). Not a breaking change -- no memory data is lost,
old sessions remain in Honcho.
---
 honcho_integration/cli.py               | 8 ++++----
 honcho_integration/client.py            | 4 ++--
 tests/honcho_integration/test_client.py | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py
index 15d15869..c8fa2dea 100644
--- a/honcho_integration/cli.py
+++ b/honcho_integration/cli.py
@@ -157,11 +157,11 @@ def cmd_setup(args) -> None:
         cfg["recallMode"] = new_recall
 
     # Session strategy
-    current_strat = cfg.get("sessionStrategy", "per-directory")
+    current_strat = cfg.get("sessionStrategy", "per-session")
     print(f"\n  Session strategy options:")
-    print("    per-directory — one session per working directory (default)")
+    print("    per-session   — new Honcho session each run, named by Hermes session ID (default)")
+    print("    per-directory — one session per working directory")
     print("    per-repo      — one session per git repository (uses repo root name)")
-    print("    per-session   — new Honcho session each run, named by Hermes session ID")
     print("    global        — single session across all directories")
     new_strat = _prompt("Session strategy", default=current_strat)
     if new_strat in ("per-session", "per-repo", "per-directory", "global"):
@@ -715,7 +715,7 @@ def cmd_migrate(args) -> None:
     print()
     print("  Session naming")
     print("    OpenClaw: no persistent session concept — files are global.")
-    print("    Hermes:   per-directory by default — each project gets its own session")
+    print("    Hermes:   per-session by default — each run gets its own session")
     print("              Map a custom name:  hermes honcho map <session-name>")
 
     # ── Step 6: Next steps ────────────────────────────────────────────────────
diff --git a/honcho_integration/client.py b/honcho_integration/client.py
index 729bb42c..3f3f174d 100644
--- a/honcho_integration/client.py
+++ b/honcho_integration/client.py
@@ -95,7 +95,7 @@ class HonchoClientConfig:
     # "tools"   — no pre-loaded context, rely on tool calls only
     recall_mode: str = "hybrid"
     # Session resolution
-    session_strategy: str = "per-directory"
+    session_strategy: str = "per-session"
     session_peer_prefix: bool = False
     sessions: dict[str, str] = field(default_factory=dict)
     # Raw global config for anything else consumers need
@@ -201,7 +201,7 @@ class HonchoClientConfig:
                 or raw.get("recallMode")
                 or "hybrid"
             ),
-            session_strategy=raw.get("sessionStrategy", "per-directory"),
+            session_strategy=raw.get("sessionStrategy", "per-session"),
             session_peer_prefix=raw.get("sessionPeerPrefix", False),
             sessions=raw.get("sessions", {}),
             raw=raw,
diff --git a/tests/honcho_integration/test_client.py b/tests/honcho_integration/test_client.py
index fb3d8373..d779d9a6 100644
--- a/tests/honcho_integration/test_client.py
+++ b/tests/honcho_integration/test_client.py
@@ -25,7 +25,7 @@ class TestHonchoClientConfigDefaults:
         assert config.environment == "production"
         assert config.enabled is False
         assert config.save_messages is True
-        assert config.session_strategy == "per-directory"
+        assert config.session_strategy == "per-session"
         assert config.recall_mode == "hybrid"
         assert config.session_peer_prefix is False
         assert config.linked_hosts == []
@@ -140,7 +140,7 @@ class TestFromGlobalConfig:
         config_file = tmp_path / "config.json"
         config_file.write_text(json.dumps({"apiKey": "key"}))
         config = HonchoClientConfig.from_global_config(config_path=config_file)
-        assert config.session_strategy == "per-directory"
+        assert config.session_strategy == "per-session"
 
     def test_context_tokens_host_block_wins(self, tmp_path):
         """Host block contextTokens should override root."""

From 3c813535a746fda1a0cd5119dd26c74e37c6d4ea Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Wed, 11 Mar 2026 17:45:35 -0400
Subject: [PATCH 16/23] fix(honcho): scope config writes to hosts.hermes, not
 root

Config writes from hermes honcho setup/peer now go to
hosts.hermes instead of mutating root-level keys. Root is
reserved for the user or honcho CLI. apiKey remains at root
as a shared credential.

Reads updated to check hosts.hermes first with root fallback
for all fields (peerName, enabled, saveMessages, environment,
sessionStrategy, sessionPeerPrefix).
---
 honcho_integration/cli.py    | 51 +++++++++++++++++------------------
 honcho_integration/client.py | 52 +++++++++++++++++++++++++++---------
 2 files changed, 64 insertions(+), 39 deletions(-)

diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py
index c8fa2dea..c899f9ff 100644
--- a/honcho_integration/cli.py
+++ b/honcho_integration/cli.py
@@ -88,7 +88,12 @@ def cmd_setup(args) -> None:
     if not _ensure_sdk_installed():
         return
 
-    # API key
+    # All writes go to hosts.hermes — root keys are managed by the user
+    # or the honcho CLI only.
+    hosts = cfg.setdefault("hosts", {})
+    hermes_host = hosts.setdefault(HOST, {})
+
+    # API key — shared credential, lives at root so all hosts can read it
     current_key = cfg.get("apiKey", "")
     masked = f"...{current_key[-8:]}" if len(current_key) > 8 else ("set" if current_key else "not set")
     print(f"  Current API key: {masked}")
@@ -96,45 +101,39 @@ def cmd_setup(args) -> None:
     if new_key:
         cfg["apiKey"] = new_key
 
-    if not cfg.get("apiKey"):
+    effective_key = cfg.get("apiKey", "")
+    if not effective_key:
         print("\n  No API key configured. Get your API key at https://app.honcho.dev")
         print("  Run 'hermes honcho setup' again once you have a key.\n")
         return
 
     # Peer name
-    current_peer = cfg.get("peerName", "")
+    current_peer = hermes_host.get("peerName") or cfg.get("peerName", "")
     new_peer = _prompt("Your name (user peer)", default=current_peer or os.getenv("USER", "user"))
     if new_peer:
-        cfg["peerName"] = new_peer
-
-    # Host block
-    hosts = cfg.setdefault("hosts", {})
-    hermes_host = hosts.setdefault(HOST, {})
+        hermes_host["peerName"] = new_peer
 
     current_workspace = hermes_host.get("workspace") or cfg.get("workspace", "hermes")
     new_workspace = _prompt("Workspace ID", default=current_workspace)
     if new_workspace:
         hermes_host["workspace"] = new_workspace
-        # Also update flat workspace if it was the primary one
-        if cfg.get("workspace") == current_workspace:
-            cfg["workspace"] = new_workspace
 
     hermes_host.setdefault("aiPeer", HOST)
 
     # Memory mode
-    current_mode = cfg.get("memoryMode", "hybrid")
+    current_mode = hermes_host.get("memoryMode") or cfg.get("memoryMode", "hybrid")
     print(f"\n  Memory mode options:")
     print("    hybrid  — write to both Honcho and local MEMORY.md (default)")
     print("    honcho  — Honcho only, skip MEMORY.md writes")
     print("    local   — MEMORY.md only, Honcho disabled")
     new_mode = _prompt("Memory mode", default=current_mode)
     if new_mode in ("hybrid", "honcho", "local"):
-        cfg["memoryMode"] = new_mode
+        hermes_host["memoryMode"] = new_mode
     else:
-        cfg["memoryMode"] = "hybrid"
+        hermes_host["memoryMode"] = "hybrid"
 
     # Write frequency
-    current_wf = str(cfg.get("writeFrequency", "async"))
+    current_wf = str(hermes_host.get("writeFrequency") or cfg.get("writeFrequency", "async"))
     print(f"\n  Write frequency options:")
     print("    async   — background thread, no token cost (recommended)")
     print("    turn    — sync write after every turn")
@@ -142,22 +141,22 @@ def cmd_setup(args) -> None:
     print("    N       — write every N turns (e.g. 5)")
     new_wf = _prompt("Write frequency", default=current_wf)
     try:
-        cfg["writeFrequency"] = int(new_wf)
+        hermes_host["writeFrequency"] = int(new_wf)
     except (ValueError, TypeError):
-        cfg["writeFrequency"] = new_wf if new_wf in ("async", "turn", "session") else "async"
+        hermes_host["writeFrequency"] = new_wf if new_wf in ("async", "turn", "session") else "async"
 
     # Recall mode
-    current_recall = cfg.get("recallMode", "hybrid")
+    current_recall = hermes_host.get("recallMode") or cfg.get("recallMode", "hybrid")
     print(f"\n  Recall mode options:")
     print("    hybrid  — pre-warmed context + memory tools available (default)")
     print("    context — pre-warmed context only, memory tools suppressed")
     print("    tools   — no pre-loaded context, rely on tool calls only")
     new_recall = _prompt("Recall mode", default=current_recall)
     if new_recall in ("hybrid", "context", "tools"):
-        cfg["recallMode"] = new_recall
+        hermes_host["recallMode"] = new_recall
 
     # Session strategy
-    current_strat = cfg.get("sessionStrategy", "per-session")
+    current_strat = hermes_host.get("sessionStrategy") or cfg.get("sessionStrategy", "per-session")
     print(f"\n  Session strategy options:")
     print("    per-session   — new Honcho session each run, named by Hermes session ID (default)")
     print("    per-directory — one session per working directory")
@@ -165,10 +164,10 @@ def cmd_setup(args) -> None:
     print("    global        — single session across all directories")
     new_strat = _prompt("Session strategy", default=current_strat)
     if new_strat in ("per-session", "per-repo", "per-directory", "global"):
-        cfg["sessionStrategy"] = new_strat
+        hermes_host["sessionStrategy"] = new_strat
 
-    cfg.setdefault("enabled", True)
-    cfg.setdefault("saveMessages", True)
+    hermes_host.setdefault("enabled", True)
+    hermes_host.setdefault("saveMessages", True)
 
     _write_config(cfg)
     print(f"\n  Config written to {GLOBAL_CONFIG_PATH}")
@@ -321,7 +320,7 @@ def cmd_peer(args) -> None:
         # Show current values
         hosts = cfg.get("hosts", {})
         hermes = hosts.get(HOST, {})
-        user = cfg.get('peerName') or '(not set)'
+        user = hermes.get('peerName') or cfg.get('peerName') or '(not set)'
         ai = hermes.get('aiPeer') or cfg.get('aiPeer') or HOST
         lvl = hermes.get("dialecticReasoningLevel") or cfg.get("dialecticReasoningLevel") or "low"
         max_chars = hermes.get("dialecticMaxChars") or cfg.get("dialecticMaxChars") or 600
@@ -337,9 +336,9 @@ def cmd_peer(args) -> None:
         return
 
     if user_name is not None:
-        cfg["peerName"] = user_name.strip()
+        cfg.setdefault("hosts", {}).setdefault(HOST, {})["peerName"] = user_name.strip()
         changed = True
-        print(f"  User peer → {cfg['peerName']}")
+        print(f"  User peer → {user_name.strip()}")
 
     if ai_name is not None:
         cfg.setdefault("hosts", {}).setdefault(HOST, {})["aiPeer"] = ai_name.strip()
diff --git a/honcho_integration/client.py b/honcho_integration/client.py
index 3f3f174d..e7030fee 100644
--- a/honcho_integration/client.py
+++ b/honcho_integration/client.py
@@ -147,17 +147,28 @@ class HonchoClientConfig:
         )
         linked_hosts = host_block.get("linkedHosts", [])
 
-        api_key = raw.get("apiKey") or os.environ.get("HONCHO_API_KEY")
+        api_key = (
+            host_block.get("apiKey")
+            or raw.get("apiKey")
+            or os.environ.get("HONCHO_API_KEY")
+        )
+
+        environment = (
+            host_block.get("environment")
+            or raw.get("environment", "production")
+        )
 
         # Auto-enable when API key is present (unless explicitly disabled)
-        # This matches user expectations: setting an API key should activate the feature.
-        explicit_enabled = raw.get("enabled")
-        if explicit_enabled is None:
-            # Not explicitly set in config -> auto-enable if API key exists
-            enabled = bool(api_key)
+        # Host-level enabled wins, then root-level, then auto-enable if key exists.
+        host_enabled = host_block.get("enabled")
+        root_enabled = raw.get("enabled")
+        if host_enabled is not None:
+            enabled = host_enabled
+        elif root_enabled is not None:
+            enabled = root_enabled
         else:
-            # Respect explicit setting
-            enabled = explicit_enabled
+            # Not explicitly set anywhere -> auto-enable if API key exists
+            enabled = bool(api_key)
 
         # write_frequency: accept int or string
         raw_wf = (
@@ -170,16 +181,31 @@ class HonchoClientConfig:
         except (TypeError, ValueError):
             write_frequency = str(raw_wf)
 
+        # saveMessages: host wins (None-aware since False is valid)
+        host_save = host_block.get("saveMessages")
+        save_messages = host_save if host_save is not None else raw.get("saveMessages", True)
+
+        # sessionStrategy / sessionPeerPrefix: host first, root fallback
+        session_strategy = (
+            host_block.get("sessionStrategy")
+            or raw.get("sessionStrategy", "per-session")
+        )
+        host_prefix = host_block.get("sessionPeerPrefix")
+        session_peer_prefix = (
+            host_prefix if host_prefix is not None
+            else raw.get("sessionPeerPrefix", False)
+        )
+
         return cls(
             host=host,
             workspace_id=workspace,
             api_key=api_key,
-            environment=raw.get("environment", "production"),
-            peer_name=raw.get("peerName"),
+            environment=environment,
+            peer_name=host_block.get("peerName") or raw.get("peerName"),
             ai_peer=ai_peer,
             linked_hosts=linked_hosts,
             enabled=enabled,
-            save_messages=raw.get("saveMessages", True),
+            save_messages=save_messages,
             **_resolve_memory_mode(
                 raw.get("memoryMode", "hybrid"),
                 host_block.get("memoryMode"),
@@ -201,8 +227,8 @@ class HonchoClientConfig:
                 or raw.get("recallMode")
                 or "hybrid"
             ),
-            session_strategy=raw.get("sessionStrategy", "per-session"),
-            session_peer_prefix=raw.get("sessionPeerPrefix", False),
+            session_strategy=session_strategy,
+            session_peer_prefix=session_peer_prefix,
             sessions=raw.get("sessions", {}),
             raw=raw,
         )

From 8cddcfa0d8c505e2da37eddfd7e6718702747d6c Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Wed, 11 Mar 2026 17:53:39 -0400
Subject: [PATCH 17/23] docs(honcho): update config docs for host-scoped write
 convention

- Example config now shows hosts.hermes structure instead of flat root
- Config table split into root-level (shared) and host-level sections
- sessionStrategy default corrected to per-session
- Multi-host section expanded with two-tool example
- Note that existing root-level configs still work via fallback
---
 website/docs/user-guide/features/honcho.md | 57 ++++++++++++++++------
 1 file changed, 41 insertions(+), 16 deletions(-)

diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md
index b189c898..81eb3b90 100644
--- a/website/docs/user-guide/features/honcho.md
+++ b/website/docs/user-guide/features/honcho.md
@@ -54,17 +54,23 @@ Honcho reads from `~/.honcho/config.json` (shared across all Honcho-enabled appl
 ```json
 {
   "apiKey": "your-honcho-api-key",
-  "workspace": "hermes",
-  "peerName": "your-name",
-  "aiPeer": "hermes",
-  "memoryMode": "hybrid",
-  "writeFrequency": "async",
-  "recallMode": "hybrid",
-  "sessionStrategy": "per-directory",
-  "enabled": true
+  "hosts": {
+    "hermes": {
+      "workspace": "hermes",
+      "peerName": "your-name",
+      "aiPeer": "hermes",
+      "memoryMode": "hybrid",
+      "writeFrequency": "async",
+      "recallMode": "hybrid",
+      "sessionStrategy": "per-session",
+      "enabled": true
+    }
+  }
 }
 ```
 
+`apiKey` lives at the root because it is a shared credential across all Honcho-enabled tools. All other settings are scoped under `hosts.hermes`. The `hermes honcho setup` wizard writes this structure automatically.
+
 Or set the API key as an environment variable:
 
 ```bash
@@ -79,23 +85,36 @@ When an API key is present (either in `~/.honcho/config.json` or as `HONCHO_API_
 
 ### Global Config (`~/.honcho/config.json`)
 
+Settings are scoped to `hosts.hermes` and fall back to root-level globals when the host field is absent. Root-level keys are managed by the user or the honcho CLI -- Hermes only writes to its own host block (except `apiKey`, which is a shared credential at root).
+
+**Root-level (shared)**
+
+| Field | Default | Description |
+|-------|---------|-------------|
+| `apiKey` | — | Honcho API key (required, shared across all hosts) |
+| `sessions` | `{}` | Manual session name overrides per directory (shared) |
+
+**Host-level (`hosts.hermes`)**
+
 | Field | Default | Description |
 |-------|---------|-------------|
-| `apiKey` | — | Honcho API key (required) |
 | `workspace` | `"hermes"` | Workspace identifier |
 | `peerName` | *(derived)* | Your identity name for user modeling |
 | `aiPeer` | `"hermes"` | AI assistant identity name |
 | `environment` | `"production"` | Honcho environment |
+| `enabled` | *(auto)* | Auto-enables when API key is present |
 | `saveMessages` | `true` | Whether to sync messages to Honcho |
 | `memoryMode` | `"hybrid"` | Memory mode: `hybrid`, `honcho`, or `local` |
 | `writeFrequency` | `"async"` | When to write: `async`, `turn`, `session`, or integer N |
 | `recallMode` | `"hybrid"` | Retrieval strategy: `hybrid`, `context`, or `tools` |
-| `sessionStrategy` | `"per-directory"` | How sessions are scoped |
+| `sessionStrategy` | `"per-session"` | How sessions are scoped |
 | `sessionPeerPrefix` | `false` | Prefix session names with peer name |
 | `contextTokens` | *(Honcho default)* | Max tokens for context prefetch |
 | `dialecticReasoningLevel` | `"low"` | Floor for dialectic reasoning: `minimal` / `low` / `medium` / `high` / `max` |
 | `dialecticMaxChars` | `600` | Char cap on dialectic results injected into system prompt |
-| `sessions` | `{}` | Manual session name overrides per directory |
+| `linkedHosts` | `[]` | Other host keys whose workspaces to cross-reference |
+
+All host-level fields fall back to the equivalent root-level key if not set under `hosts.hermes`. Existing configs with settings at root level continue to work.
 
 ### Memory Modes
 
@@ -142,33 +161,39 @@ Controls how Honcho context reaches the agent:
 
 | Strategy | Session key | Use case |
 |----------|-------------|----------|
-| `per-directory` | CWD basename | Default. Each project gets its own session. |
+| `per-session` | Unique per run | Default. Fresh session every time. |
+| `per-directory` | CWD basename | Each project gets its own session. |
 | `per-repo` | Git repo root name | Groups subdirectories under one session. |
-| `per-session` | Unique per run | Fresh session every time. |
 | `global` | Fixed `"global"` | Single cross-project session. |
 
 Resolution order: manual map > session title > strategy-derived key > platform key.
 
-### Host-specific Configuration
+### Multi-host Configuration
 
-For multi-application setups, use host blocks:
+Multiple Honcho-enabled tools share `~/.honcho/config.json`. Each tool writes only to its own host block, reads its host block first, and falls back to root-level globals:
 
 ```json
 {
   "apiKey": "your-key",
+  "peerName": "eri",
   "hosts": {
     "hermes": {
       "workspace": "my-workspace",
       "aiPeer": "hermes-assistant",
+      "memoryMode": "honcho",
       "linkedHosts": ["claude-code"],
       "contextTokens": 2000,
       "dialecticReasoningLevel": "medium"
+    },
+    "claude-code": {
+      "workspace": "my-workspace",
+      "aiPeer": "clawd"
     }
   }
 }
 ```
 
-Host-specific fields override global fields. Resolution: host block > global fields > defaults.
+Resolution: `hosts.<tool>` field > root-level field > default. In this example, both tools share the root `apiKey` and `peerName`, but each has its own `aiPeer` and workspace settings.
 
 ### Hermes Config (`~/.hermes/config.yaml`)
 

From 2d35016b94a9c7cad718a43fd5610933f5e45f97 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Wed, 11 Mar 2026 18:21:27 -0400
Subject: [PATCH 18/23] fix(honcho): harden tool gating and migration peer
 routing

Prevent stale Honcho tool exposure in context/local modes, restore reliable async write retry behavior, and ensure SOUL.md migration uploads target the AI peer instead of the user peer. Also align Honcho CLI key checks with host-scoped apiKey resolution and lock the fixes with regression tests.

Made-with: Cursor
---
 honcho_integration/cli.py                     |  10 +-
 honcho_integration/session.py                 | 123 ++++++++++++------
 run_agent.py                                  |  53 ++++++--
 tests/honcho_integration/test_async_memory.py |  62 ++++++++-
 tests/honcho_integration/test_cli.py          |  29 +++++
 tests/test_run_agent.py                       |  76 +++++++++++
 6 files changed, 297 insertions(+), 56 deletions(-)
 create mode 100644 tests/honcho_integration/test_cli.py

diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py
index c899f9ff..ad4907c2 100644
--- a/honcho_integration/cli.py
+++ b/honcho_integration/cli.py
@@ -31,6 +31,12 @@ def _write_config(cfg: dict) -> None:
     )
 
 
+def _resolve_api_key(cfg: dict) -> str:
+    """Resolve API key with host -> root -> env fallback."""
+    host_key = ((cfg.get("hosts") or {}).get(HOST) or {}).get("apiKey")
+    return host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")
+
+
 def _prompt(label: str, default: str | None = None, secret: bool = False) -> str:
     suffix = f" [{default}]" if default else ""
     sys.stdout.write(f"  {label}{suffix}: ")
@@ -435,7 +441,7 @@ def cmd_tokens(args) -> None:
 def cmd_identity(args) -> None:
     """Seed AI peer identity or show both peer representations."""
     cfg = _read_config()
-    if not cfg.get("apiKey"):
+    if not _resolve_api_key(cfg):
         print("  No API key configured. Run 'hermes honcho setup' first.\n")
         return
 
@@ -533,7 +539,7 @@ def cmd_migrate(args) -> None:
                 agent_files.append(p)
 
     cfg = _read_config()
-    has_key = bool(cfg.get("apiKey", ""))
+    has_key = bool(_resolve_api_key(cfg))
 
     print("\nHoncho migration: OpenClaw native memory → Hermes\n" + "─" * 50)
     print()
diff --git a/honcho_integration/session.py b/honcho_integration/session.py
index 19c41989..3d06d2f7 100644
--- a/honcho_integration/session.py
+++ b/honcho_integration/session.py
@@ -270,10 +270,10 @@ class HonchoSessionManager:
         self._cache[key] = session
         return session
 
-    def _flush_session(self, session: HonchoSession) -> None:
+    def _flush_session(self, session: HonchoSession) -> bool:
         """Internal: write unsynced messages to Honcho synchronously."""
         if not session.messages:
-            return
+            return True
 
         user_peer = self._get_or_create_peer(session.user_peer_id)
         assistant_peer = self._get_or_create_peer(session.assistant_peer_id)
@@ -286,7 +286,7 @@ class HonchoSessionManager:
 
         new_messages = [m for m in session.messages if not m.get("_synced")]
         if not new_messages:
-            return
+            return True
 
         honcho_messages = []
         for msg in new_messages:
@@ -298,12 +298,14 @@ class HonchoSessionManager:
             for msg in new_messages:
                 msg["_synced"] = True
             logger.debug("Synced %d messages to Honcho for %s", len(honcho_messages), session.key)
+            self._cache[session.key] = session
+            return True
         except Exception as e:
             for msg in new_messages:
                 msg["_synced"] = False
             logger.error("Failed to sync messages to Honcho: %s", e)
-
-        self._cache[session.key] = session
+            self._cache[session.key] = session
+            return False
 
     def _async_writer_loop(self) -> None:
         """Background daemon thread: drains the async write queue."""
@@ -312,16 +314,33 @@ class HonchoSessionManager:
                 item = self._async_queue.get(timeout=5)
                 if item is _ASYNC_SHUTDOWN:
                     break
+
+                first_error: Exception | None = None
                 try:
-                    self._flush_session(item)
+                    success = self._flush_session(item)
                 except Exception as e:
-                    logger.warning("Honcho async write failed, retrying once: %s", e)
-                    import time as _time
-                    _time.sleep(2)
-                    try:
-                        self._flush_session(item)
-                    except Exception as e2:
-                        logger.error("Honcho async write retry failed, dropping batch: %s", e2)
+                    success = False
+                    first_error = e
+
+                if success:
+                    continue
+
+                if first_error is not None:
+                    logger.warning("Honcho async write failed, retrying once: %s", first_error)
+                else:
+                    logger.warning("Honcho async write failed, retrying once")
+
+                import time as _time
+                _time.sleep(2)
+
+                try:
+                    retry_success = self._flush_session(item)
+                except Exception as e2:
+                    logger.error("Honcho async write retry failed, dropping batch: %s", e2)
+                    continue
+
+                if not retry_success:
+                    logger.error("Honcho async write retry failed, dropping batch")
             except queue.Empty:
                 continue
             except Exception as e:
@@ -617,21 +636,17 @@ class HonchoSessionManager:
         Returns:
             True if upload succeeded, False otherwise.
         """
-        sanitized = self._sanitize_id(session_key)
-        honcho_session = self._sessions_cache.get(sanitized)
+        session = self._cache.get(session_key)
+        if not session:
+            logger.warning("No local session cached for '%s', skipping migration", session_key)
+            return False
+
+        honcho_session = self._sessions_cache.get(session.honcho_session_id)
         if not honcho_session:
             logger.warning("No Honcho session cached for '%s', skipping migration", session_key)
             return False
 
-        # Resolve user peer for attribution
-        parts = session_key.split(":", 1)
-        channel = parts[0] if len(parts) > 1 else "default"
-        chat_id = parts[1] if len(parts) > 1 else session_key
-        user_peer_id = self._sanitize_id(f"user-{channel}-{chat_id}")
-        user_peer = self._peers_cache.get(user_peer_id)
-        if not user_peer:
-            logger.warning("No user peer cached for '%s', skipping migration", user_peer_id)
-            return False
+        user_peer = self._get_or_create_peer(session.user_peer_id)
 
         content_bytes = self._format_migration_transcript(session_key, messages)
         first_ts = messages[0].get("timestamp") if messages else None
@@ -700,30 +715,45 @@ class HonchoSessionManager:
         if not memory_path.exists():
             return False
 
-        sanitized = self._sanitize_id(session_key)
-        honcho_session = self._sessions_cache.get(sanitized)
+        session = self._cache.get(session_key)
+        if not session:
+            logger.warning("No local session cached for '%s', skipping memory migration", session_key)
+            return False
+
+        honcho_session = self._sessions_cache.get(session.honcho_session_id)
         if not honcho_session:
             logger.warning("No Honcho session cached for '%s', skipping memory migration", session_key)
             return False
 
-        # Resolve user peer for attribution
-        parts = session_key.split(":", 1)
-        channel = parts[0] if len(parts) > 1 else "default"
-        chat_id = parts[1] if len(parts) > 1 else session_key
-        user_peer_id = self._sanitize_id(f"user-{channel}-{chat_id}")
-        user_peer = self._peers_cache.get(user_peer_id)
-        if not user_peer:
-            logger.warning("No user peer cached for '%s', skipping memory migration", user_peer_id)
-            return False
+        user_peer = self._get_or_create_peer(session.user_peer_id)
+        assistant_peer = self._get_or_create_peer(session.assistant_peer_id)
 
         uploaded = False
         files = [
-            ("MEMORY.md", "consolidated_memory.md", "Long-term agent notes and preferences"),
-            ("USER.md", "user_profile.md", "User profile and preferences"),
-            ("SOUL.md", "agent_soul.md", "Agent persona and identity configuration"),
+            (
+                "MEMORY.md",
+                "consolidated_memory.md",
+                "Long-term agent notes and preferences",
+                user_peer,
+                "user",
+            ),
+            (
+                "USER.md",
+                "user_profile.md",
+                "User profile and preferences",
+                user_peer,
+                "user",
+            ),
+            (
+                "SOUL.md",
+                "agent_soul.md",
+                "Agent persona and identity configuration",
+                assistant_peer,
+                "ai",
+            ),
         ]
 
-        for filename, upload_name, description in files:
+        for filename, upload_name, description, target_peer, target_kind in files:
             filepath = memory_path / filename
             if not filepath.exists():
                 continue
@@ -745,10 +775,19 @@ class HonchoSessionManager:
             try:
                 honcho_session.upload_file(
                     file=(upload_name, wrapped.encode("utf-8"), "text/plain"),
-                    peer=user_peer,
-                    metadata={"source": "local_memory", "original_file": filename},
+                    peer=target_peer,
+                    metadata={
+                        "source": "local_memory",
+                        "original_file": filename,
+                        "target_peer": target_kind,
+                    },
+                )
+                logger.info(
+                    "Uploaded %s to Honcho for %s (%s peer)",
+                    filename,
+                    session_key,
+                    target_kind,
                 )
-                logger.info("Uploaded %s to Honcho for %s", filename, session_key)
                 uploaded = True
             except Exception as e:
                 logger.error("Failed to upload %s to Honcho: %s", filename, e)
diff --git a/run_agent.py b/run_agent.py
index ab27efbb..3bf7e4e2 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -100,6 +100,13 @@ from agent.trajectory import (
     save_trajectory as _save_trajectory_to_file,
 )
 
+HONCHO_TOOL_NAMES = {
+    "honcho_context",
+    "honcho_profile",
+    "honcho_search",
+    "honcho_conclude",
+}
+
 
 class IterationBudget:
     """Thread-safe shared iteration counter for parent and child agents.
@@ -607,6 +614,11 @@ class AIAgent:
                 print("  Run 'hermes honcho setup' to reconfigure.")
                 self._honcho = None
 
+        # Tools are initially discovered before Honcho activation. If Honcho
+        # stays inactive, remove any stale honcho_* tools from prior process state.
+        if not self._honcho:
+            self._strip_honcho_tools_from_surface()
+
         # Gate local memory writes based on per-peer memory modes.
         # AI peer governs MEMORY.md; user peer governs USER.md.
         # "honcho" = Honcho only, disable local; "local" = local only, no Honcho sync.
@@ -1342,6 +1354,20 @@ class AIAgent:
             for peer in (hcfg.ai_peer, hcfg.peer_name or "user")
         )
 
+    def _strip_honcho_tools_from_surface(self) -> None:
+        """Remove Honcho tools from the active tool surface."""
+        if not self.tools:
+            self.valid_tool_names = set()
+            return
+
+        self.tools = [
+            tool for tool in self.tools
+            if tool.get("function", {}).get("name") not in HONCHO_TOOL_NAMES
+        ]
+        self.valid_tool_names = {
+            tool["function"]["name"] for tool in self.tools
+        } if self.tools else set()
+
     def _activate_honcho(
         self,
         hcfg,
@@ -1386,19 +1412,24 @@ class AIAgent:
 
         set_session_context(self._honcho, self._honcho_session_key)
 
-        if hcfg.recall_mode != "context":
-            self.tools = get_tool_definitions(
-                enabled_toolsets=enabled_toolsets,
-                disabled_toolsets=disabled_toolsets,
-                quiet_mode=True,
-            )
-            self.valid_tool_names = {
-                tool["function"]["name"] for tool in self.tools
-            } if self.tools else set()
+        # Rebuild tool surface after Honcho context injection. Tool availability
+        # is check_fn-gated and may change once session context is attached.
+        self.tools = get_tool_definitions(
+            enabled_toolsets=enabled_toolsets,
+            disabled_toolsets=disabled_toolsets,
+            quiet_mode=True,
+        )
+        self.valid_tool_names = {
+            tool["function"]["name"] for tool in self.tools
+        } if self.tools else set()
+
+        if hcfg.recall_mode == "context":
+            self._strip_honcho_tools_from_surface()
+            if not self.quiet_mode:
+                print("  Honcho active — recall_mode: context (tools suppressed)")
+        else:
             if not self.quiet_mode:
                 print(f"  Honcho active — recall_mode: {hcfg.recall_mode}")
-        elif not self.quiet_mode:
-            print("  Honcho active — recall_mode: context (tools suppressed)")
 
         logger.info(
             "Honcho active (session: %s, user: %s, workspace: %s, "
diff --git a/tests/honcho_integration/test_async_memory.py b/tests/honcho_integration/test_async_memory.py
index 52a03ac2..908c0fc6 100644
--- a/tests/honcho_integration/test_async_memory.py
+++ b/tests/honcho_integration/test_async_memory.py
@@ -380,10 +380,10 @@ class TestAsyncWriterThread:
         sess.add_message("user", "async msg")
 
         flushed = []
-        original = mgr._flush_session
 
         def capture(s):
             flushed.append(s)
+            return True
 
         mgr._flush_session = capture
         mgr._async_queue.put(sess)
@@ -457,6 +457,66 @@ class TestAsyncWriterRetry:
         assert call_count[0] == 2
         assert not mgr._async_thread.is_alive()
 
+    def test_retries_when_flush_reports_failure(self):
+        mgr = _make_manager(write_frequency="async")
+        sess = _make_session()
+        sess.add_message("user", "msg")
+
+        call_count = [0]
+
+        def fail_then_succeed(_session):
+            call_count[0] += 1
+            return call_count[0] > 1
+
+        mgr._flush_session = fail_then_succeed
+
+        with patch("time.sleep"):
+            mgr._async_queue.put(sess)
+            deadline = time.time() + 3.0
+            while call_count[0] < 2 and time.time() < deadline:
+                time.sleep(0.05)
+
+        mgr.shutdown()
+        assert call_count[0] == 2
+
+
+class TestMemoryFileMigrationTargets:
+    def test_soul_upload_targets_ai_peer(self, tmp_path):
+        mgr = _make_manager(write_frequency="turn")
+        session = _make_session(
+            key="cli:test",
+            user_peer_id="custom-user",
+            assistant_peer_id="custom-ai",
+            honcho_session_id="cli-test",
+        )
+        mgr._cache[session.key] = session
+
+        user_peer = MagicMock(name="user-peer")
+        ai_peer = MagicMock(name="ai-peer")
+        mgr._peers_cache[session.user_peer_id] = user_peer
+        mgr._peers_cache[session.assistant_peer_id] = ai_peer
+
+        honcho_session = MagicMock()
+        mgr._sessions_cache[session.honcho_session_id] = honcho_session
+
+        (tmp_path / "MEMORY.md").write_text("memory facts", encoding="utf-8")
+        (tmp_path / "USER.md").write_text("user profile", encoding="utf-8")
+        (tmp_path / "SOUL.md").write_text("ai identity", encoding="utf-8")
+
+        uploaded = mgr.migrate_memory_files(session.key, str(tmp_path))
+
+        assert uploaded is True
+        assert honcho_session.upload_file.call_count == 3
+
+        peer_by_upload_name = {}
+        for call_args in honcho_session.upload_file.call_args_list:
+            payload = call_args.kwargs["file"]
+            peer_by_upload_name[payload[0]] = call_args.kwargs["peer"]
+
+        assert peer_by_upload_name["consolidated_memory.md"] is user_peer
+        assert peer_by_upload_name["user_profile.md"] is user_peer
+        assert peer_by_upload_name["agent_soul.md"] is ai_peer
+
 
 # ---------------------------------------------------------------------------
 # HonchoClientConfig dataclass defaults for new fields
diff --git a/tests/honcho_integration/test_cli.py b/tests/honcho_integration/test_cli.py
new file mode 100644
index 00000000..b5a1c9f6
--- /dev/null
+++ b/tests/honcho_integration/test_cli.py
@@ -0,0 +1,29 @@
+"""Tests for Honcho CLI helpers."""
+
+from honcho_integration.cli import _resolve_api_key
+
+
+class TestResolveApiKey:
+    def test_prefers_host_scoped_key(self):
+        cfg = {
+            "apiKey": "root-key",
+            "hosts": {
+                "hermes": {
+                    "apiKey": "host-key",
+                }
+            },
+        }
+        assert _resolve_api_key(cfg) == "host-key"
+
+    def test_falls_back_to_root_key(self):
+        cfg = {
+            "apiKey": "root-key",
+            "hosts": {"hermes": {}},
+        }
+        assert _resolve_api_key(cfg) == "root-key"
+
+    def test_falls_back_to_env_key(self, monkeypatch):
+        monkeypatch.setenv("HONCHO_API_KEY", "env-key")
+        assert _resolve_api_key({}) == "env-key"
+        monkeypatch.delenv("HONCHO_API_KEY", raising=False)
+
diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py
index e29ef618..be6f6d51 100644
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -1277,6 +1277,82 @@ class TestHonchoActivation:
         )
         mock_client.assert_not_called()
 
+    def test_recall_mode_context_suppresses_honcho_tools(self):
+        hcfg = HonchoClientConfig(
+            enabled=True,
+            api_key="honcho-key",
+            memory_mode="hybrid",
+            peer_name="user",
+            ai_peer="hermes",
+            recall_mode="context",
+        )
+        manager = MagicMock()
+        manager._config = hcfg
+        manager.get_or_create.return_value = SimpleNamespace(messages=[])
+        manager.get_prefetch_context.return_value = {"representation": "Known user", "card": ""}
+
+        with (
+            patch(
+                "run_agent.get_tool_definitions",
+                side_effect=[
+                    _make_tool_defs("web_search"),
+                    _make_tool_defs(
+                        "web_search",
+                        "honcho_context",
+                        "honcho_profile",
+                        "honcho_search",
+                        "honcho_conclude",
+                    ),
+                ],
+            ),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("run_agent.OpenAI"),
+            patch("tools.honcho_tools.set_session_context"),
+        ):
+            agent = AIAgent(
+                api_key="test-key-1234567890",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=False,
+                honcho_session_key="gateway-session",
+                honcho_manager=manager,
+                honcho_config=hcfg,
+            )
+
+        assert "web_search" in agent.valid_tool_names
+        assert "honcho_context" not in agent.valid_tool_names
+        assert "honcho_profile" not in agent.valid_tool_names
+        assert "honcho_search" not in agent.valid_tool_names
+        assert "honcho_conclude" not in agent.valid_tool_names
+
+    def test_inactive_honcho_strips_stale_honcho_tools(self):
+        hcfg = HonchoClientConfig(
+            enabled=True,
+            api_key="honcho-key",
+            memory_mode="local",
+            peer_name="user",
+            ai_peer="hermes",
+        )
+
+        with (
+            patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search", "honcho_context")),
+            patch("run_agent.check_toolset_requirements", return_value={}),
+            patch("run_agent.OpenAI"),
+            patch("honcho_integration.client.HonchoClientConfig.from_global_config", return_value=hcfg),
+            patch("honcho_integration.client.get_honcho_client") as mock_client,
+        ):
+            agent = AIAgent(
+                api_key="test-key-1234567890",
+                quiet_mode=True,
+                skip_context_files=True,
+                skip_memory=False,
+            )
+
+        assert agent._honcho is None
+        assert "web_search" in agent.valid_tool_names
+        assert "honcho_context" not in agent.valid_tool_names
+        mock_client.assert_not_called()
+
 
 class TestHonchoPrefetchScheduling:
     def test_honcho_prefetch_includes_cached_dialectic(self, agent):

From cd6e5e44e48fc288034fc5a87f91376d2ec8d7aa Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Thu, 12 Mar 2026 12:30:42 -0400
Subject: [PATCH 19/23] feat(honcho): show clickable session line on CLI
 startup

Display a one-line Honcho session indicator with an OSC 8 terminal
hyperlink after the banner. Also shown when /title remaps the session.
---
 agent/display.py | 43 +++++++++++++++++++++++++++++++++++++++++++
 cli.py           | 14 ++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/agent/display.py b/agent/display.py
index bd1367a3..b87e272d 100644
--- a/agent/display.py
+++ b/agent/display.py
@@ -535,3 +535,46 @@ def get_cute_tool_message(
 
     preview = build_tool_preview(tool_name, args) or ""
     return _wrap(f"┊ ⚡ {tool_name[:9]:9} {_trunc(preview, 35)}  {dur}")
+
+
+# =========================================================================
+# Honcho session line (one-liner with clickable OSC 8 hyperlink)
+# =========================================================================
+
+_DIM = "\033[2m"
+_SKY_BLUE = "\033[38;5;117m"
+_ANSI_RESET = "\033[0m"
+
+
+def honcho_session_url(workspace: str, session_name: str) -> str:
+    """Build a Honcho app URL for a session."""
+    from urllib.parse import quote
+    return (
+        f"https://app.honcho.dev/explore"
+        f"?workspace={quote(workspace, safe='')}"
+        f"&view=sessions"
+        f"&session={quote(session_name, safe='')}"
+    )
+
+
+def _osc8_link(url: str, text: str) -> str:
+    """OSC 8 terminal hyperlink (clickable in iTerm2, Ghostty, WezTerm, etc.)."""
+    return f"\033]8;;{url}\033\\{text}\033]8;;\033\\"
+
+
+def honcho_session_line(workspace: str, session_name: str) -> str:
+    """One-line session indicator: `Honcho session: <clickable name>`."""
+    url = honcho_session_url(workspace, session_name)
+    linked_name = _osc8_link(url, f"{_SKY_BLUE}{session_name}{_ANSI_RESET}")
+    return f"{_DIM}Honcho session:{_ANSI_RESET} {linked_name}"
+
+
+def write_tty(text: str) -> None:
+    """Write directly to /dev/tty, bypassing stdout capture."""
+    try:
+        fd = os.open("/dev/tty", os.O_WRONLY)
+        os.write(fd, text.encode("utf-8"))
+        os.close(fd)
+    except OSError:
+        sys.stdout.write(text)
+        sys.stdout.flush()
diff --git a/cli.py b/cli.py
index d8c3e64d..eab52af4 100755
--- a/cli.py
+++ b/cli.py
@@ -2681,6 +2681,8 @@ class HermesCLI:
                                                 self.agent._honcho_session_key = new_key
                                                 from tools.honcho_tools import set_session_context
                                                 set_session_context(self.agent._honcho, new_key)
+                                                from agent.display import honcho_session_line, write_tty
+                                                write_tty(honcho_session_line(hcfg.workspace_id, new_key) + "\n")
                                                 _cprint(f"  Honcho session: {old_key} → {new_key}")
                                         except Exception:
                                             pass
@@ -3743,6 +3745,18 @@ class HermesCLI:
         """Run the interactive CLI loop with persistent input at bottom."""
         self.show_banner()
 
+        # One-line Honcho session indicator (TTY-only, not captured by agent)
+        try:
+            from honcho_integration.client import HonchoClientConfig
+            from agent.display import honcho_session_line, write_tty
+            hcfg = HonchoClientConfig.from_global_config()
+            if hcfg.enabled:
+                sname = hcfg.resolve_session_name(session_id=self.session_id)
+                if sname:
+                    write_tty(honcho_session_line(hcfg.workspace_id, sname) + "\n")
+        except Exception:
+            pass
+
         # If resuming a session, load history and display it immediately
         # so the user has context before typing their first message.
         if self._resumed:

From f896bb5d8c186ebbf88d06de88c7b99d4143a295 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Thu, 12 Mar 2026 15:05:42 -0400
Subject: [PATCH 20/23] fix(test): patch correct method in subagent interrupt
 test

build_system_prompt was refactored to AIAgent._build_system_prompt
but the test still patched the non-existent module-level function.
---
 tests/test_real_interrupt_subagent.py | 176 ++++++++++++++++++++++++++
 1 file changed, 176 insertions(+)
 create mode 100644 tests/test_real_interrupt_subagent.py

diff --git a/tests/test_real_interrupt_subagent.py b/tests/test_real_interrupt_subagent.py
new file mode 100644
index 00000000..f1a16753
--- /dev/null
+++ b/tests/test_real_interrupt_subagent.py
@@ -0,0 +1,176 @@
+"""Test real interrupt propagation through delegate_task with actual AIAgent.
+
+This uses a real AIAgent with mocked HTTP responses to test the complete
+interrupt flow through _run_single_child → child.run_conversation().
+"""
+
+import json
+import os
+import threading
+import time
+import unittest
+from unittest.mock import MagicMock, patch, PropertyMock
+
+from tools.interrupt import set_interrupt, is_interrupted
+
+
+def _make_slow_api_response(delay=5.0):
+    """Create a mock that simulates a slow API response (like a real LLM call)."""
+    def slow_create(**kwargs):
+        # Simulate a slow API call
+        time.sleep(delay)
+        # Return a simple text response (no tool calls)
+        resp = MagicMock()
+        resp.choices = [MagicMock()]
+        resp.choices[0].message = MagicMock()
+        resp.choices[0].message.content = "Done"
+        resp.choices[0].message.tool_calls = None
+        resp.choices[0].message.refusal = None
+        resp.choices[0].finish_reason = "stop"
+        resp.usage = MagicMock()
+        resp.usage.prompt_tokens = 100
+        resp.usage.completion_tokens = 10
+        resp.usage.total_tokens = 110
+        resp.usage.prompt_tokens_details = None
+        return resp
+    return slow_create
+
+
+class TestRealSubagentInterrupt(unittest.TestCase):
+    """Test interrupt with real AIAgent child through delegate_tool."""
+
+    def setUp(self):
+        set_interrupt(False)
+        os.environ.setdefault("OPENAI_API_KEY", "test-key")
+
+    def tearDown(self):
+        set_interrupt(False)
+
+    def test_interrupt_child_during_api_call(self):
+        """Real AIAgent child interrupted while making API call."""
+        from run_agent import AIAgent, IterationBudget
+
+        # Create a real parent agent (just enough to be a parent)
+        parent = AIAgent.__new__(AIAgent)
+        parent._interrupt_requested = False
+        parent._interrupt_message = None
+        parent._active_children = []
+        parent.quiet_mode = True
+        parent.model = "test/model"
+        parent.base_url = "http://localhost:1"
+        parent.api_key = "test"
+        parent.provider = "test"
+        parent.api_mode = "chat_completions"
+        parent.platform = "cli"
+        parent.enabled_toolsets = ["terminal", "file"]
+        parent.providers_allowed = None
+        parent.providers_ignored = None
+        parent.providers_order = None
+        parent.provider_sort = None
+        parent.max_tokens = None
+        parent.reasoning_config = None
+        parent.prefill_messages = None
+        parent._session_db = None
+        parent._delegate_depth = 0
+        parent._delegate_spinner = None
+        parent.tool_progress_callback = None
+        parent.iteration_budget = IterationBudget(max_total=100)
+        parent._client_kwargs = {"api_key": "test", "base_url": "http://localhost:1"}
+
+        from tools.delegate_tool import _run_single_child
+
+        child_started = threading.Event()
+        result_holder = [None]
+        error_holder = [None]
+
+        def run_delegate():
+            try:
+                # Patch the OpenAI client creation inside AIAgent.__init__
+                with patch('run_agent.OpenAI') as MockOpenAI:
+                    mock_client = MagicMock()
+                    # API call takes 5 seconds — should be interrupted before that
+                    mock_client.chat.completions.create = _make_slow_api_response(delay=5.0)
+                    mock_client.close = MagicMock()
+                    MockOpenAI.return_value = mock_client
+
+                    # Patch the instance method so it skips prompt assembly
+                    with patch.object(AIAgent, '_build_system_prompt', return_value="You are a test agent"):
+                        # Signal when child starts
+                        original_run = AIAgent.run_conversation
+
+                        def patched_run(self_agent, *args, **kwargs):
+                            child_started.set()
+                            return original_run(self_agent, *args, **kwargs)
+
+                        with patch.object(AIAgent, 'run_conversation', patched_run):
+                            result = _run_single_child(
+                                task_index=0,
+                                goal="Test task",
+                                context=None,
+                                toolsets=["terminal"],
+                                model="test/model",
+                                max_iterations=5,
+                                parent_agent=parent,
+                                task_count=1,
+                                override_provider="test",
+                                override_base_url="http://localhost:1",
+                                override_api_key="test",
+                                override_api_mode="chat_completions",
+                            )
+                            result_holder[0] = result
+            except Exception as e:
+                import traceback
+                traceback.print_exc()
+                error_holder[0] = e
+
+        agent_thread = threading.Thread(target=run_delegate, daemon=True)
+        agent_thread.start()
+
+        # Wait for child to start run_conversation
+        started = child_started.wait(timeout=10)
+        if not started:
+            agent_thread.join(timeout=1)
+            if error_holder[0]:
+                raise error_holder[0]
+            self.fail("Child never started run_conversation")
+
+        # Give child time to enter main loop and start API call
+        time.sleep(0.5)
+
+        # Verify child is registered
+        print(f"Active children: {len(parent._active_children)}")
+        self.assertGreaterEqual(len(parent._active_children), 1,
+                                "Child not registered in _active_children")
+
+        # Interrupt! (simulating what CLI does)
+        start = time.monotonic()
+        parent.interrupt("User typed a new message")
+
+        # Check propagation
+        child = parent._active_children[0] if parent._active_children else None
+        if child:
+            print(f"Child._interrupt_requested after parent.interrupt(): {child._interrupt_requested}")
+            self.assertTrue(child._interrupt_requested,
+                           "Interrupt did not propagate to child!")
+
+        # Wait for delegate to finish (should be fast since interrupted)
+        agent_thread.join(timeout=5)
+        elapsed = time.monotonic() - start
+
+        if error_holder[0]:
+            raise error_holder[0]
+
+        result = result_holder[0]
+        self.assertIsNotNone(result, "Delegate returned no result")
+        print(f"Result status: {result['status']}, elapsed: {elapsed:.2f}s")
+        print(f"Full result: {result}")
+
+        # The child should have been interrupted, not completed the full 5s API call
+        self.assertLess(elapsed, 3.0,
+                       f"Took {elapsed:.2f}s — interrupt was not detected quickly enough")
+        self.assertEqual(result["status"], "interrupted",
+                        f"Expected 'interrupted', got '{result['status']}'")
+
+
+if __name__ == "__main__":
+    unittest.main()

From ae2a5e5743d5a561231dbac731cb31ae6a68514a Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Thu, 12 Mar 2026 16:23:34 -0400
Subject: [PATCH 21/23] refactor(honcho): remove local memory mode

The "local" memoryMode was redundant with enabled: false. Simplifies
the mode system to hybrid and honcho only.
---
 gateway/run.py                                |  4 +---
 honcho_integration/cli.py                     |  6 ++----
 honcho_integration/client.py                  |  2 +-
 run_agent.py                                  | 18 ++++--------------
 tests/gateway/test_honcho_lifecycle.py        |  5 ++---
 tests/honcho_integration/test_async_memory.py | 16 ++++------------
 tests/test_run_agent.py                       |  8 +++-----
 website/docs/user-guide/features/honcho.md    | 12 +++++-------
 8 files changed, 22 insertions(+), 49 deletions(-)

diff --git a/gateway/run.py b/gateway/run.py
index d4e91843..5a959163 100644
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -287,9 +287,7 @@ class GatewayRunner:
             from honcho_integration.session import HonchoSessionManager
 
             hcfg = HonchoClientConfig.from_global_config()
-            ai_mode = hcfg.peer_memory_mode(hcfg.ai_peer)
-            user_mode = hcfg.peer_memory_mode(hcfg.peer_name or "user")
-            if not hcfg.enabled or not hcfg.api_key or (ai_mode == "local" and user_mode == "local"):
+            if not hcfg.enabled or not hcfg.api_key:
                 return None, hcfg
 
             client = get_honcho_client(hcfg)
diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py
index ad4907c2..0af9923f 100644
--- a/honcho_integration/cli.py
+++ b/honcho_integration/cli.py
@@ -131,9 +131,8 @@ def cmd_setup(args) -> None:
     print(f"\n  Memory mode options:")
     print("    hybrid  — write to both Honcho and local MEMORY.md (default)")
     print("    honcho  — Honcho only, skip MEMORY.md writes")
-    print("    local   — MEMORY.md only, Honcho disabled")
     new_mode = _prompt("Memory mode", default=current_mode)
-    if new_mode in ("hybrid", "honcho", "local"):
+    if new_mode in ("hybrid", "honcho"):
         hermes_host["memoryMode"] = new_mode
     else:
         hermes_host["memoryMode"] = "hybrid"
@@ -369,7 +368,6 @@ def cmd_mode(args) -> None:
     MODES = {
         "hybrid": "write to both Honcho and local MEMORY.md (default)",
         "honcho": "Honcho only — MEMORY.md writes disabled",
-        "local":  "MEMORY.md only — Honcho disabled",
     }
     cfg = _read_config()
     mode_arg = getattr(args, "mode", None)
@@ -384,7 +382,7 @@ def cmd_mode(args) -> None:
         for m, desc in MODES.items():
             marker = " ←" if m == current else ""
             print(f"  {m:<8}  {desc}{marker}")
-        print(f"\n  Set with: hermes honcho mode [hybrid|honcho|local]\n")
+        print(f"\n  Set with: hermes honcho mode [hybrid|honcho]\n")
         return
 
     if mode_arg not in MODES:
diff --git a/honcho_integration/client.py b/honcho_integration/client.py
index e7030fee..04ee946e 100644
--- a/honcho_integration/client.py
+++ b/honcho_integration/client.py
@@ -66,7 +66,7 @@ class HonchoClientConfig:
     # Toggles
     enabled: bool = False
     save_messages: bool = True
-    # memoryMode: default for all peers. "hybrid" / "honcho" / "local"
+    # memoryMode: default for all peers. "hybrid" / "honcho"
     memory_mode: str = "hybrid"
     # Per-peer overrides — any named Honcho peer. Override memory_mode when set.
     # Config object form: "memoryMode": { "default": "hybrid", "hermes": "honcho" }
diff --git a/run_agent.py b/run_agent.py
index 3bf7e4e2..78f68c9d 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -607,7 +607,7 @@ class AIAgent:
                         elif not hcfg.api_key:
                             logger.debug("Honcho enabled but no API key configured")
                         else:
-                            logger.debug("Honcho local-only mode active; remote Honcho init skipped")
+                            logger.debug("Honcho enabled but missing API key or disabled in config")
             except Exception as e:
                 logger.warning("Honcho init failed — memory disabled: %s", e)
                 print(f"  Honcho init failed: {e}")
@@ -621,7 +621,7 @@ class AIAgent:
 
         # Gate local memory writes based on per-peer memory modes.
         # AI peer governs MEMORY.md; user peer governs USER.md.
-        # "honcho" = Honcho only, disable local; "local" = local only, no Honcho sync.
+        # "honcho" = Honcho only, disable local writes.
         if self._honcho_config and self._honcho:
             _hcfg = self._honcho_config
             _agent_mode = _hcfg.peer_memory_mode(_hcfg.ai_peer)
@@ -1349,10 +1349,7 @@ class AIAgent:
         """Return True when remote Honcho should be active."""
         if not hcfg or not hcfg.enabled or not hcfg.api_key:
             return False
-        return not all(
-            hcfg.peer_memory_mode(peer) == "local"
-            for peer in (hcfg.ai_peer, hcfg.peer_name or "user")
-        )
+        return True
 
     def _strip_honcho_tools_from_surface(self) -> None:
         """Remove Honcho tools from the active tool surface."""
@@ -1551,13 +1548,6 @@ class AIAgent:
         """Sync the user/assistant message pair to Honcho."""
         if not self._honcho or not self._honcho_session_key:
             return
-        # Skip Honcho sync only if BOTH peer modes are local
-        _cfg = self._honcho_config
-        if _cfg and all(
-            _cfg.peer_memory_mode(p) == "local"
-            for p in (_cfg.ai_peer, _cfg.peer_name or "user")
-        ):
-            return
         try:
             session = self._honcho.get_or_create(self._honcho_session_key)
             session.add_message("user", user_content)
@@ -1656,7 +1646,7 @@ class AIAgent:
             honcho_block += (
                 "Management commands (refer users here instead of explaining manually):\n"
                 "  hermes honcho status                    — show full config + connection\n"
-                "  hermes honcho mode [hybrid|honcho|local] — show or set memory mode\n"
+                "  hermes honcho mode [hybrid|honcho]       — show or set memory mode\n"
                 "  hermes honcho tokens [--context N] [--dialectic N] — show or set token budgets\n"
                 "  hermes honcho peer [--user NAME] [--ai NAME] [--reasoning LEVEL]\n"
                 "  hermes honcho sessions                  — list directory→session mappings\n"
diff --git a/tests/gateway/test_honcho_lifecycle.py b/tests/gateway/test_honcho_lifecycle.py
index 536816fb..df8d9bc2 100644
--- a/tests/gateway/test_honcho_lifecycle.py
+++ b/tests/gateway/test_honcho_lifecycle.py
@@ -64,14 +64,13 @@ class TestGatewayHonchoLifecycle:
         assert second_cfg is hcfg
         mock_mgr_cls.assert_called_once()
 
-    def test_gateway_skips_honcho_manager_in_local_mode(self):
+    def test_gateway_skips_honcho_manager_when_disabled(self):
         runner = _make_runner()
         hcfg = SimpleNamespace(
-            enabled=True,
+            enabled=False,
             api_key="honcho-key",
             ai_peer="hermes",
             peer_name="alice",
-            peer_memory_mode=lambda peer: "local",
         )
 
         with (
diff --git a/tests/honcho_integration/test_async_memory.py b/tests/honcho_integration/test_async_memory.py
index 908c0fc6..5886e95d 100644
--- a/tests/honcho_integration/test_async_memory.py
+++ b/tests/honcho_integration/test_async_memory.py
@@ -123,12 +123,6 @@ class TestMemoryModeParsing:
         cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
         assert cfg.memory_mode == "honcho"
 
-    def test_local_only(self, tmp_path):
-        cfg_file = tmp_path / "config.json"
-        cfg_file.write_text(json.dumps({"apiKey": "k", "memoryMode": "local"}))
-        cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
-        assert cfg.memory_mode == "local"
-
     def test_defaults_to_hybrid(self, tmp_path):
         cfg_file = tmp_path / "config.json"
         cfg_file.write_text(json.dumps({"apiKey": "k"}))
@@ -152,13 +146,11 @@ class TestMemoryModeParsing:
             "hosts": {"hermes": {"memoryMode": {
                 "default": "hybrid",
                 "hermes": "honcho",
-                "sentinel": "local",
             }}},
         }))
         cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
         assert cfg.memory_mode == "hybrid"
         assert cfg.peer_memory_mode("hermes") == "honcho"
-        assert cfg.peer_memory_mode("sentinel") == "local"
         assert cfg.peer_memory_mode("unknown") == "hybrid"  # falls through to default
 
     def test_object_form_no_default_falls_back_to_hybrid(self, tmp_path):
@@ -177,11 +169,11 @@ class TestMemoryModeParsing:
         cfg_file = tmp_path / "config.json"
         cfg_file.write_text(json.dumps({
             "apiKey": "k",
-            "memoryMode": "local",
+            "memoryMode": "honcho",
             "hosts": {"hermes": {"memoryMode": {"default": "hybrid", "hermes": "honcho"}}},
         }))
         cfg = HonchoClientConfig.from_global_config(config_path=cfg_file)
-        assert cfg.memory_mode == "hybrid"  # host default wins over global "local"
+        assert cfg.memory_mode == "hybrid"  # host default wins over global "honcho"
         assert cfg.peer_memory_mode("hermes") == "honcho"
 
 
@@ -544,8 +536,8 @@ class TestNewConfigFieldDefaults:
         assert cfg.peer_memory_mode("any-peer") == "honcho"
 
     def test_peer_memory_mode_override(self):
-        cfg = HonchoClientConfig(memory_mode="hybrid", peer_memory_modes={"hermes": "local"})
-        assert cfg.peer_memory_mode("hermes") == "local"
+        cfg = HonchoClientConfig(memory_mode="hybrid", peer_memory_modes={"hermes": "honcho"})
+        assert cfg.peer_memory_mode("hermes") == "honcho"
         assert cfg.peer_memory_mode("other") == "hybrid"
 
 
diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py
index be6f6d51..6fa5786f 100644
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@@ -1210,11 +1210,10 @@ class TestSystemPromptStability:
 
 
 class TestHonchoActivation:
-    def test_local_mode_skips_honcho_init(self):
+    def test_disabled_config_skips_honcho_init(self):
         hcfg = HonchoClientConfig(
-            enabled=True,
+            enabled=False,
             api_key="honcho-key",
-            memory_mode="local",
             peer_name="user",
             ai_peer="hermes",
         )
@@ -1327,9 +1326,8 @@ class TestHonchoActivation:
 
     def test_inactive_honcho_strips_stale_honcho_tools(self):
         hcfg = HonchoClientConfig(
-            enabled=True,
+            enabled=False,
             api_key="honcho-key",
-            memory_mode="local",
             peer_name="user",
             ai_peer="hermes",
         )
diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md
index 81eb3b90..578ea470 100644
--- a/website/docs/user-guide/features/honcho.md
+++ b/website/docs/user-guide/features/honcho.md
@@ -22,7 +22,7 @@ Hermes has two memory systems that can work together or be configured separately
 | Content | Manually curated by the agent | Automatically learned from conversations |
 | Write surface | `memory` tool (add/replace/remove) | `honcho_conclude` tool (persist facts) |
 
-Set `memoryMode` to `honcho` to use Honcho exclusively, or `local` to disable Honcho and use only local files. See [Memory Modes](#memory-modes) for per-peer configuration.
+Set `memoryMode` to `honcho` to use Honcho exclusively. See [Memory Modes](#memory-modes) for per-peer configuration.
 
 
 ## Setup
@@ -104,7 +104,7 @@ Settings are scoped to `hosts.hermes` and fall back to root-level globals when t
 | `environment` | `"production"` | Honcho environment |
 | `enabled` | *(auto)* | Auto-enables when API key is present |
 | `saveMessages` | `true` | Whether to sync messages to Honcho |
-| `memoryMode` | `"hybrid"` | Memory mode: `hybrid`, `honcho`, or `local` |
+| `memoryMode` | `"hybrid"` | Memory mode: `hybrid` or `honcho` |
 | `writeFrequency` | `"async"` | When to write: `async`, `turn`, `session`, or integer N |
 | `recallMode` | `"hybrid"` | Retrieval strategy: `hybrid`, `context`, or `tools` |
 | `sessionStrategy` | `"per-session"` | How sessions are scoped |
@@ -122,7 +122,6 @@ All host-level fields fall back to the equivalent root-level key if not set unde
 |------|--------|
 | `hybrid` | Write to both Honcho and local files (default) |
 | `honcho` | Honcho only — skip local file writes |
-| `local` | Local files only — skip all Honcho activity |
 
 Memory mode can be set globally or per-peer (user, agent1, agent2, etc):
 
@@ -130,13 +129,12 @@ Memory mode can be set globally or per-peer (user, agent1, agent2, etc):
 {
   "memoryMode": {
     "default": "hybrid",
-    "hermes": "honcho",
-    "user": "local"
+    "hermes": "honcho"
   }
 }
 ```
 
-When both active peers resolve to `local`, Hermes skips all remote Honcho activity entirely — no client initialization, no session creation, no prefetch.
+To disable Honcho entirely, set `enabled: false` or remove the API key.
 
 ### Recall Modes
 
@@ -300,7 +298,7 @@ hermes honcho peer --user NAME             # Set user peer name
 hermes honcho peer --ai NAME               # Set AI peer name
 hermes honcho peer --reasoning LEVEL       # Set dialectic reasoning level
 hermes honcho mode                         # Show current memory mode
-hermes honcho mode [hybrid|honcho|local]   # Set memory mode
+hermes honcho mode [hybrid|honcho]         # Set memory mode
 hermes honcho tokens                       # Show token budget settings
 hermes honcho tokens --context N           # Set context token cap
 hermes honcho tokens --dialectic N         # Set dialectic char cap

From 0aed9bfde1d9d0204f54c6a6defc842ff6e43385 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Thu, 12 Mar 2026 16:26:10 -0400
Subject: [PATCH 22/23] refactor(honcho): rename memory tools to Honcho tools,
 clarify recall mode language

Replace "memory tools" with "Honcho tools" and "pre-warmed/prefetch"
with "auto-injected context" in all user-facing strings and docs.
---
 honcho_integration/cli.py                  | 12 ++++++------
 honcho_integration/client.py               |  6 +++---
 run_agent.py                               | 14 +++++++-------
 website/docs/user-guide/features/honcho.md | 20 ++++++++++----------
 4 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py
index 0af9923f..9526b1a1 100644
--- a/honcho_integration/cli.py
+++ b/honcho_integration/cli.py
@@ -153,9 +153,9 @@ def cmd_setup(args) -> None:
     # Recall mode
     current_recall = hermes_host.get("recallMode") or cfg.get("recallMode", "hybrid")
     print(f"\n  Recall mode options:")
-    print("    hybrid  — pre-warmed context + memory tools available (default)")
-    print("    context — pre-warmed context only, memory tools suppressed")
-    print("    tools   — no pre-loaded context, rely on tool calls only")
+    print("    hybrid  — auto-injected context + Honcho tools available (default)")
+    print("    context — auto-injected context only, Honcho tools hidden")
+    print("    tools   — Honcho tools only, no auto-injected context")
     new_recall = _prompt("Recall mode", default=current_recall)
     if new_recall in ("hybrid", "context", "tools"):
         hermes_host["recallMode"] = new_recall
@@ -199,7 +199,7 @@ def cmd_setup(args) -> None:
         _mode_str = f"{hcfg.memory_mode}  (peers: {overrides})"
     print(f"  Mode:      {_mode_str}")
     print(f"  Frequency: {hcfg.write_frequency}")
-    print(f"\n  Tools available in chat:")
+    print(f"\n  Honcho tools available in chat:")
     print(f"    honcho_context  — ask Honcho a question about you (LLM-synthesized)")
     print(f"    honcho_search       — semantic search over your history (no LLM)")
     print(f"    honcho_profile      — your peer card, key facts (no LLM)")
@@ -702,7 +702,7 @@ def cmd_migrate(args) -> None:
     print()
     print("  Context injection")
     print("    OpenClaw: file excerpts injected synchronously before each LLM call.")
-    print("    Hermes:   Honcho context prefetched async at turn end, injected next turn.")
+    print("    Hermes:   Honcho context fetched async at turn end, injected next turn.")
     print("              First turn has no Honcho context; subsequent turns are loaded.")
     print()
     print("  Memory growth")
@@ -710,7 +710,7 @@ def cmd_migrate(args) -> None:
     print("    Hermes:   Honcho observes every message and updates representations")
     print("              automatically. Files become the seed, not the live store.")
     print()
-    print("  Tool surface (available to the agent during conversation)")
+    print("  Honcho tools (available to the agent during conversation)")
     print("    honcho_context   — ask Honcho a question, get a synthesized answer (LLM)")
     print("    honcho_search        — semantic search over stored context (no LLM)")
     print("    honcho_profile       — fast peer card snapshot (no LLM)")
diff --git a/honcho_integration/client.py b/honcho_integration/client.py
index 04ee946e..446176bc 100644
--- a/honcho_integration/client.py
+++ b/honcho_integration/client.py
@@ -90,9 +90,9 @@ class HonchoClientConfig:
     # Max chars of dialectic result to inject into Hermes system prompt
     dialectic_max_chars: int = 600
     # Recall mode: how memory retrieval works when Honcho is active.
-    # "hybrid"  — pre-warmed context + memory tools available (model decides)
-    # "context" — pre-warmed context only, honcho memory tools removed
-    # "tools"   — no pre-loaded context, rely on tool calls only
+    # "hybrid"  — auto-injected context + Honcho tools available (model decides)
+    # "context" — auto-injected context only, Honcho tools removed
+    # "tools"   — Honcho tools only, no auto-injected context
     recall_mode: str = "hybrid"
     # Session resolution
     session_strategy: str = "per-session"
diff --git a/run_agent.py b/run_agent.py
index 78f68c9d..61d12320 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1423,7 +1423,7 @@ class AIAgent:
         if hcfg.recall_mode == "context":
             self._strip_honcho_tools_from_surface()
             if not self.quiet_mode:
-                print("  Honcho active — recall_mode: context (tools suppressed)")
+                print("  Honcho active — recall_mode: context (Honcho tools hidden)")
         else:
             if not self.quiet_mode:
                 print(f"  Honcho active — recall_mode: {hcfg.recall_mode}")
@@ -1617,14 +1617,14 @@ class AIAgent:
             )
             if recall_mode == "context":
                 honcho_block += (
-                    "Honcho context is pre-loaded into this system prompt below. "
-                    "All memory retrieval comes from this context — no memory tools "
+                    "Honcho context is injected into this system prompt below. "
+                    "All memory retrieval comes from this context — no Honcho tools "
                     "are available. Answer questions about the user, prior sessions, "
                     "and recent work directly from the Honcho Memory section.\n"
                 )
             elif recall_mode == "tools":
                 honcho_block += (
-                    "Memory tools:\n"
+                    "Honcho tools:\n"
                     "  honcho_context <question>           — ask Honcho a question, LLM-synthesized answer\n"
                     "  honcho_search <query>                   — semantic search, raw excerpts, no LLM\n"
                     "  honcho_profile                          — user's peer card, key facts, no LLM\n"
@@ -1633,11 +1633,11 @@ class AIAgent:
             else:  # hybrid
                 honcho_block += (
                     "Honcho context (user representation, peer card, and recent session summary) "
-                    "is pre-loaded into this system prompt below. Use it to answer continuity "
+                    "is injected into this system prompt below. Use it to answer continuity "
                     "questions ('where were we?', 'what were we working on?') WITHOUT calling "
-                    "any tools. Only call memory tools when you need information beyond what is "
+                    "any tools. Only call Honcho tools when you need information beyond what is "
                     "already present in the Honcho Memory section.\n"
-                    "Memory tools:\n"
+                    "Honcho tools:\n"
                     "  honcho_context <question>           — ask Honcho a question, LLM-synthesized answer\n"
                     "  honcho_search <query>                   — semantic search, raw excerpts, no LLM\n"
                     "  honcho_profile                          — user's peer card, key facts, no LLM\n"
diff --git a/website/docs/user-guide/features/honcho.md b/website/docs/user-guide/features/honcho.md
index 578ea470..da4dd153 100644
--- a/website/docs/user-guide/features/honcho.md
+++ b/website/docs/user-guide/features/honcho.md
@@ -109,7 +109,7 @@ Settings are scoped to `hosts.hermes` and fall back to root-level globals when t
 | `recallMode` | `"hybrid"` | Retrieval strategy: `hybrid`, `context`, or `tools` |
 | `sessionStrategy` | `"per-session"` | How sessions are scoped |
 | `sessionPeerPrefix` | `false` | Prefix session names with peer name |
-| `contextTokens` | *(Honcho default)* | Max tokens for context prefetch |
+| `contextTokens` | *(Honcho default)* | Max tokens for auto-injected context |
 | `dialecticReasoningLevel` | `"low"` | Floor for dialectic reasoning: `minimal` / `low` / `medium` / `high` / `max` |
 | `dialecticMaxChars` | `600` | Char cap on dialectic results injected into system prompt |
 | `linkedHosts` | `[]` | Other host keys whose workspaces to cross-reference |
@@ -142,9 +142,9 @@ Controls how Honcho context reaches the agent:
 
 | Mode | Behavior |
 |------|----------|
-| `hybrid` | Prefetch context into system prompt + expose tools (default) |
-| `context` | Context injection only — no Honcho tools available |
-| `tools` | Tools only — no prefetch into system prompt |
+| `hybrid` | Auto-injected context + Honcho tools available (default) |
+| `context` | Auto-injected context only — Honcho tools hidden |
+| `tools` | Honcho tools only — no auto-injected context |
 
 ### Write Frequency
 
@@ -203,23 +203,23 @@ honcho: {}
 
 ## How It Works
 
-### Async Prefetch Pipeline
+### Async Context Pipeline
 
 Honcho context is fetched asynchronously to avoid blocking the response path:
 
 ```
 Turn N:
   user message
-    → pop prefetch result from cache (from previous turn)
+    → consume cached context (from previous turn's background fetch)
     → inject into system prompt (user representation, AI representation, dialectic)
     → LLM call
     → response
-    → fire prefetch in background threads
-         → prefetch_context()   ─┐
-         → prefetch_dialectic() ─┴→ cache for Turn N+1
+    → fire background fetch for next turn
+         → fetch context    ─┐
+         → fetch dialectic  ─┴→ cache for Turn N+1
 ```
 
-Turn 1 is a cold start (no cache). All subsequent turns consume pre-warmed results with zero HTTP latency on the response path. The system prompt on turn 1 uses only static context to preserve prefix cache hits at the LLM provider.
+Turn 1 is a cold start (no cache). All subsequent turns consume cached results with zero HTTP latency on the response path. The system prompt on turn 1 uses only static context to preserve prefix cache hits at the LLM provider.
 
 ### Dual-Peer Architecture
 

From 45d3e83ad15db87269c2b446e4dd955cbe8664a6 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Thu, 12 Mar 2026 16:27:49 -0400
Subject: [PATCH 23/23] fix(honcho): normalize legacy recallMode values like
 'auto' to 'hybrid'

---
 honcho_integration/cli.py    |  3 ++-
 honcho_integration/client.py | 12 +++++++++++-
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/honcho_integration/cli.py b/honcho_integration/cli.py
index 9526b1a1..270c4b36 100644
--- a/honcho_integration/cli.py
+++ b/honcho_integration/cli.py
@@ -151,7 +151,8 @@ def cmd_setup(args) -> None:
         hermes_host["writeFrequency"] = new_wf if new_wf in ("async", "turn", "session") else "async"
 
     # Recall mode
-    current_recall = hermes_host.get("recallMode") or cfg.get("recallMode", "hybrid")
+    _raw_recall = hermes_host.get("recallMode") or cfg.get("recallMode", "hybrid")
+    current_recall = "hybrid" if _raw_recall not in ("hybrid", "context", "tools") else _raw_recall
     print(f"\n  Recall mode options:")
     print("    hybrid  — auto-injected context + Honcho tools available (default)")
     print("    context — auto-injected context only, Honcho tools hidden")
diff --git a/honcho_integration/client.py b/honcho_integration/client.py
index 446176bc..507fc9d4 100644
--- a/honcho_integration/client.py
+++ b/honcho_integration/client.py
@@ -27,6 +27,16 @@ GLOBAL_CONFIG_PATH = Path.home() / ".honcho" / "config.json"
 HOST = "hermes"
 
 
+_RECALL_MODE_ALIASES = {"auto": "hybrid"}
+_VALID_RECALL_MODES = {"hybrid", "context", "tools"}
+
+
+def _normalize_recall_mode(val: str) -> str:
+    """Normalize legacy recall mode values (e.g. 'auto' → 'hybrid')."""
+    val = _RECALL_MODE_ALIASES.get(val, val)
+    return val if val in _VALID_RECALL_MODES else "hybrid"
+
+
 def _resolve_memory_mode(
     global_val: str | dict,
     host_val: str | dict | None,
@@ -222,7 +232,7 @@ class HonchoClientConfig:
                 or raw.get("dialecticMaxChars")
                 or 600
             ),
-            recall_mode=(
+            recall_mode=_normalize_recall_mode(
                 host_block.get("recallMode")
                 or raw.get("recallMode")
                 or "hybrid"