feat(honcho): async memory integration with prefetch pipeline and recallMode

Adds full Honcho memory integration to Hermes: - Session manager with async background writes, memory modes (honcho/hybrid/local), and dialectic prefetch for first-turn context warming - Agent integration: prefetch pipeline, tool surface gated by recallMode, system prompt context injection, SIGTERM/SIGINT flush handlers - CLI commands: setup, status, mode, tokens, peer, identity, migrate - recallMode setting (auto | context | tools) for A/B testing retrieval strategies - Session strategies: per-session, per-repo (git tree root), per-directory, global - Polymorphic memoryMode config: string shorthand or per-peer object overrides - 97 tests covering async writes, client config, session resolution, and memory modes
2026-03-09 15:58:22 -04:00 · 2026-03-09 15:58:22 -04:00 · 74c214e957
commit 74c214e957
parent 8eefbef91c
17 changed files with 2478 additions and 135 deletions
--- a/tools/browser_tool.py
+++ b/tools/browser_tool.py
@ -1640,6 +1640,25 @@ def _cleanup_old_recordings(max_age_hours=72):
        logger.debug("Recording cleanup error (non-critical): %s", e)


+def _cleanup_old_recordings(max_age_hours=72):
+    """Remove browser recordings older than max_age_hours to prevent disk bloat."""
+    import time
+    try:
+        hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
+        recordings_dir = hermes_home / "browser_recordings"
+        if not recordings_dir.exists():
+            return
+        cutoff = time.time() - (max_age_hours * 3600)
+        for f in recordings_dir.glob("session_*.webm"):
+            try:
+                if f.stat().st_mtime < cutoff:
+                    f.unlink()
+            except Exception:
+                pass
+    except Exception:
+        pass
+
+
 # ============================================================================
 # Cleanup and Management Functions
 # ============================================================================
--- a/tools/honcho_tools.py
+++ b/tools/honcho_tools.py
@ -1,8 +1,16 @@
-"""Honcho tool for querying user context via dialectic reasoning.
+"""Honcho tools for user context retrieval.

-Registers ``query_user_context`` -- an LLM-callable tool that asks Honcho
-about the current user's history, preferences, goals, and communication
-style. The session key is injected at runtime by the agent loop via
+Registers three complementary tools, ordered by capability:
+
+  query_user_context   — dialectic Q&A (LLM-powered, direct answers)
+  honcho_search        — semantic search (fast, no LLM, raw excerpts)
+  honcho_profile       — peer card (fast, no LLM, structured facts)
+
+Use query_user_context when you need Honcho to synthesize an answer.
+Use honcho_search or honcho_profile when you want raw data to reason
+over yourself.
+
+The session key is injected at runtime by the agent loop via
 ``set_session_context()``.
 """

@ -34,54 +42,6 @@ def clear_session_context() -> None:
    _session_key = None


-# ── Tool schema ──
-
-HONCHO_TOOL_SCHEMA = {
-    "name": "query_user_context",
-    "description": (
-        "Query Honcho to retrieve relevant context about the user based on their "
-        "history and preferences. Use this when you need to understand the user's "
-        "background, preferences, past interactions, or goals. This helps you "
-        "personalize your responses and provide more relevant assistance."
-    ),
-    "parameters": {
-        "type": "object",
-        "properties": {
-            "query": {
-                "type": "string",
-                "description": (
-                    "A natural language question about the user. Examples: "
-                    "'What are this user's main goals?', "
-                    "'What communication style does this user prefer?', "
-                    "'What topics has this user discussed recently?', "
-                    "'What is this user's technical expertise level?'"
-                ),
-            }
-        },
-        "required": ["query"],
-    },
-}
-
-
-# ── Tool handler ──
-
-def _handle_query_user_context(args: dict, **kw) -> str:
-    """Execute the Honcho context query."""
-    query = args.get("query", "")
-    if not query:
-        return json.dumps({"error": "Missing required parameter: query"})
-
-    if not _session_manager or not _session_key:
-        return json.dumps({"error": "Honcho is not active for this session."})
-
-    try:
-        result = _session_manager.get_user_context(_session_key, query)
-        return json.dumps({"result": result})
-    except Exception as e:
-        logger.error("Error querying Honcho user context: %s", e)
-        return json.dumps({"error": f"Failed to query user context: {e}"})
-
-
 # ── Availability check ──

 def _check_honcho_available() -> bool:
@ -89,14 +49,145 @@ def _check_honcho_available() -> bool:
    return _session_manager is not None and _session_key is not None


+# ── honcho_profile ──
+
+_PROFILE_SCHEMA = {
+    "name": "honcho_profile",
+    "description": (
+        "Retrieve the user's peer card from Honcho — a curated list of key facts "
+        "about them (name, role, preferences, communication style, patterns). "
+        "Fast, no LLM reasoning, minimal cost. "
+        "Use this at conversation start or when you need a quick factual snapshot. "
+        "Use query_user_context instead when you need Honcho to synthesize an answer."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {},
+        "required": [],
+    },
+}
+
+
+def _handle_honcho_profile(args: dict, **kw) -> str:
+    if not _session_manager or not _session_key:
+        return json.dumps({"error": "Honcho is not active for this session."})
+    try:
+        card = _session_manager.get_peer_card(_session_key)
+        if not card:
+            return json.dumps({"result": "No profile facts available yet. The user's profile builds over time through conversations."})
+        return json.dumps({"result": card})
+    except Exception as e:
+        logger.error("Error fetching Honcho peer card: %s", e)
+        return json.dumps({"error": f"Failed to fetch profile: {e}"})
+
+
+# ── honcho_search ──
+
+_SEARCH_SCHEMA = {
+    "name": "honcho_search",
+    "description": (
+        "Semantic search over Honcho's stored context about the user. "
+        "Returns raw excerpts ranked by relevance to your query — no LLM synthesis. "
+        "Cheaper and faster than query_user_context. "
+        "Good when you want to find specific past facts and reason over them yourself. "
+        "Use query_user_context when you need a direct synthesized answer."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "string",
+                "description": "What to search for in Honcho's memory (e.g. 'programming languages', 'past projects', 'timezone').",
+            },
+            "max_tokens": {
+                "type": "integer",
+                "description": "Token budget for returned context (default 800, max 2000).",
+            },
+        },
+        "required": ["query"],
+    },
+}
+
+
+def _handle_honcho_search(args: dict, **kw) -> str:
+    query = args.get("query", "")
+    if not query:
+        return json.dumps({"error": "Missing required parameter: query"})
+    if not _session_manager or not _session_key:
+        return json.dumps({"error": "Honcho is not active for this session."})
+    max_tokens = min(int(args.get("max_tokens", 800)), 2000)
+    try:
+        result = _session_manager.search_context(_session_key, query, max_tokens=max_tokens)
+        if not result:
+            return json.dumps({"result": "No relevant context found."})
+        return json.dumps({"result": result})
+    except Exception as e:
+        logger.error("Error searching Honcho context: %s", e)
+        return json.dumps({"error": f"Failed to search context: {e}"})
+
+
+# ── query_user_context (dialectic — LLM-powered) ──
+
+_QUERY_SCHEMA = {
+    "name": "query_user_context",
+    "description": (
+        "Ask Honcho a natural language question about the user and get a synthesized answer. "
+        "Uses Honcho's LLM (dialectic reasoning) — higher cost than honcho_profile or honcho_search. "
+        "Use this when you need a direct answer synthesized from the user's full history. "
+        "Examples: 'What are this user's main goals?', 'How does this user prefer to communicate?', "
+        "'What is this user's technical expertise level?'"
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "query": {
+                "type": "string",
+                "description": "A natural language question about the user.",
+            }
+        },
+        "required": ["query"],
+    },
+}
+
+
+def _handle_query_user_context(args: dict, **kw) -> str:
+    query = args.get("query", "")
+    if not query:
+        return json.dumps({"error": "Missing required parameter: query"})
+    if not _session_manager or not _session_key:
+        return json.dumps({"error": "Honcho is not active for this session."})
+    try:
+        result = _session_manager.dialectic_query(_session_key, query)
+        return json.dumps({"result": result or "No result from Honcho."})
+    except Exception as e:
+        logger.error("Error querying Honcho user context: %s", e)
+        return json.dumps({"error": f"Failed to query user context: {e}"})
+
+
 # ── Registration ──

 from tools.registry import registry

+registry.register(
+    name="honcho_profile",
+    toolset="honcho",
+    schema=_PROFILE_SCHEMA,
+    handler=_handle_honcho_profile,
+    check_fn=_check_honcho_available,
+)
+
+registry.register(
+    name="honcho_search",
+    toolset="honcho",
+    schema=_SEARCH_SCHEMA,
+    handler=_handle_honcho_search,
+    check_fn=_check_honcho_available,
+)
+
 registry.register(
    name="query_user_context",
    toolset="honcho",
-    schema=HONCHO_TOOL_SCHEMA,
+    schema=_QUERY_SCHEMA,
    handler=_handle_query_user_context,
    check_fn=_check_honcho_available,
 )