refactor(honcho): rename memory tools to Honcho tools, clarify recall mode language

Replace "memory tools" with "Honcho tools" and "pre-warmed/prefetch"
with "auto-injected context" in all user-facing strings and docs.
This commit is contained in:
Erosika 2026-03-12 16:26:10 -04:00
parent ae2a5e5743
commit 0aed9bfde1
4 changed files with 26 additions and 26 deletions

View file

@ -153,9 +153,9 @@ def cmd_setup(args) -> None:
# Recall mode # Recall mode
current_recall = hermes_host.get("recallMode") or cfg.get("recallMode", "hybrid") current_recall = hermes_host.get("recallMode") or cfg.get("recallMode", "hybrid")
print(f"\n Recall mode options:") print(f"\n Recall mode options:")
print(" hybrid — pre-warmed context + memory tools available (default)") print(" hybrid — auto-injected context + Honcho tools available (default)")
print(" context — pre-warmed context only, memory tools suppressed") print(" context — auto-injected context only, Honcho tools hidden")
print(" tools — no pre-loaded context, rely on tool calls only") print(" tools — Honcho tools only, no auto-injected context")
new_recall = _prompt("Recall mode", default=current_recall) new_recall = _prompt("Recall mode", default=current_recall)
if new_recall in ("hybrid", "context", "tools"): if new_recall in ("hybrid", "context", "tools"):
hermes_host["recallMode"] = new_recall hermes_host["recallMode"] = new_recall
@ -199,7 +199,7 @@ def cmd_setup(args) -> None:
_mode_str = f"{hcfg.memory_mode} (peers: {overrides})" _mode_str = f"{hcfg.memory_mode} (peers: {overrides})"
print(f" Mode: {_mode_str}") print(f" Mode: {_mode_str}")
print(f" Frequency: {hcfg.write_frequency}") print(f" Frequency: {hcfg.write_frequency}")
print(f"\n Tools available in chat:") print(f"\n Honcho tools available in chat:")
print(f" honcho_context — ask Honcho a question about you (LLM-synthesized)") print(f" honcho_context — ask Honcho a question about you (LLM-synthesized)")
print(f" honcho_search — semantic search over your history (no LLM)") print(f" honcho_search — semantic search over your history (no LLM)")
print(f" honcho_profile — your peer card, key facts (no LLM)") print(f" honcho_profile — your peer card, key facts (no LLM)")
@ -702,7 +702,7 @@ def cmd_migrate(args) -> None:
print() print()
print(" Context injection") print(" Context injection")
print(" OpenClaw: file excerpts injected synchronously before each LLM call.") print(" OpenClaw: file excerpts injected synchronously before each LLM call.")
print(" Hermes: Honcho context prefetched async at turn end, injected next turn.") print(" Hermes: Honcho context fetched async at turn end, injected next turn.")
print(" First turn has no Honcho context; subsequent turns are loaded.") print(" First turn has no Honcho context; subsequent turns are loaded.")
print() print()
print(" Memory growth") print(" Memory growth")
@ -710,7 +710,7 @@ def cmd_migrate(args) -> None:
print(" Hermes: Honcho observes every message and updates representations") print(" Hermes: Honcho observes every message and updates representations")
print(" automatically. Files become the seed, not the live store.") print(" automatically. Files become the seed, not the live store.")
print() print()
print(" Tool surface (available to the agent during conversation)") print(" Honcho tools (available to the agent during conversation)")
print(" honcho_context — ask Honcho a question, get a synthesized answer (LLM)") print(" honcho_context — ask Honcho a question, get a synthesized answer (LLM)")
print(" honcho_search — semantic search over stored context (no LLM)") print(" honcho_search — semantic search over stored context (no LLM)")
print(" honcho_profile — fast peer card snapshot (no LLM)") print(" honcho_profile — fast peer card snapshot (no LLM)")

View file

@ -90,9 +90,9 @@ class HonchoClientConfig:
# Max chars of dialectic result to inject into Hermes system prompt # Max chars of dialectic result to inject into Hermes system prompt
dialectic_max_chars: int = 600 dialectic_max_chars: int = 600
# Recall mode: how memory retrieval works when Honcho is active. # Recall mode: how memory retrieval works when Honcho is active.
# "hybrid" — pre-warmed context + memory tools available (model decides) # "hybrid" — auto-injected context + Honcho tools available (model decides)
# "context" — pre-warmed context only, honcho memory tools removed # "context" — auto-injected context only, Honcho tools removed
# "tools" — no pre-loaded context, rely on tool calls only # "tools" — Honcho tools only, no auto-injected context
recall_mode: str = "hybrid" recall_mode: str = "hybrid"
# Session resolution # Session resolution
session_strategy: str = "per-session" session_strategy: str = "per-session"

View file

@ -1423,7 +1423,7 @@ class AIAgent:
if hcfg.recall_mode == "context": if hcfg.recall_mode == "context":
self._strip_honcho_tools_from_surface() self._strip_honcho_tools_from_surface()
if not self.quiet_mode: if not self.quiet_mode:
print(" Honcho active — recall_mode: context (tools suppressed)") print(" Honcho active — recall_mode: context (Honcho tools hidden)")
else: else:
if not self.quiet_mode: if not self.quiet_mode:
print(f" Honcho active — recall_mode: {hcfg.recall_mode}") print(f" Honcho active — recall_mode: {hcfg.recall_mode}")
@ -1617,14 +1617,14 @@ class AIAgent:
) )
if recall_mode == "context": if recall_mode == "context":
honcho_block += ( honcho_block += (
"Honcho context is pre-loaded into this system prompt below. " "Honcho context is injected into this system prompt below. "
"All memory retrieval comes from this context — no memory tools " "All memory retrieval comes from this context — no Honcho tools "
"are available. Answer questions about the user, prior sessions, " "are available. Answer questions about the user, prior sessions, "
"and recent work directly from the Honcho Memory section.\n" "and recent work directly from the Honcho Memory section.\n"
) )
elif recall_mode == "tools": elif recall_mode == "tools":
honcho_block += ( honcho_block += (
"Memory tools:\n" "Honcho tools:\n"
" honcho_context <question> — ask Honcho a question, LLM-synthesized answer\n" " honcho_context <question> — ask Honcho a question, LLM-synthesized answer\n"
" honcho_search <query> — semantic search, raw excerpts, no LLM\n" " honcho_search <query> — semantic search, raw excerpts, no LLM\n"
" honcho_profile — user's peer card, key facts, no LLM\n" " honcho_profile — user's peer card, key facts, no LLM\n"
@ -1633,11 +1633,11 @@ class AIAgent:
else: # hybrid else: # hybrid
honcho_block += ( honcho_block += (
"Honcho context (user representation, peer card, and recent session summary) " "Honcho context (user representation, peer card, and recent session summary) "
"is pre-loaded into this system prompt below. Use it to answer continuity " "is injected into this system prompt below. Use it to answer continuity "
"questions ('where were we?', 'what were we working on?') WITHOUT calling " "questions ('where were we?', 'what were we working on?') WITHOUT calling "
"any tools. Only call memory tools when you need information beyond what is " "any tools. Only call Honcho tools when you need information beyond what is "
"already present in the Honcho Memory section.\n" "already present in the Honcho Memory section.\n"
"Memory tools:\n" "Honcho tools:\n"
" honcho_context <question> — ask Honcho a question, LLM-synthesized answer\n" " honcho_context <question> — ask Honcho a question, LLM-synthesized answer\n"
" honcho_search <query> — semantic search, raw excerpts, no LLM\n" " honcho_search <query> — semantic search, raw excerpts, no LLM\n"
" honcho_profile — user's peer card, key facts, no LLM\n" " honcho_profile — user's peer card, key facts, no LLM\n"

View file

@ -109,7 +109,7 @@ Settings are scoped to `hosts.hermes` and fall back to root-level globals when t
| `recallMode` | `"hybrid"` | Retrieval strategy: `hybrid`, `context`, or `tools` | | `recallMode` | `"hybrid"` | Retrieval strategy: `hybrid`, `context`, or `tools` |
| `sessionStrategy` | `"per-session"` | How sessions are scoped | | `sessionStrategy` | `"per-session"` | How sessions are scoped |
| `sessionPeerPrefix` | `false` | Prefix session names with peer name | | `sessionPeerPrefix` | `false` | Prefix session names with peer name |
| `contextTokens` | *(Honcho default)* | Max tokens for context prefetch | | `contextTokens` | *(Honcho default)* | Max tokens for auto-injected context |
| `dialecticReasoningLevel` | `"low"` | Floor for dialectic reasoning: `minimal` / `low` / `medium` / `high` / `max` | | `dialecticReasoningLevel` | `"low"` | Floor for dialectic reasoning: `minimal` / `low` / `medium` / `high` / `max` |
| `dialecticMaxChars` | `600` | Char cap on dialectic results injected into system prompt | | `dialecticMaxChars` | `600` | Char cap on dialectic results injected into system prompt |
| `linkedHosts` | `[]` | Other host keys whose workspaces to cross-reference | | `linkedHosts` | `[]` | Other host keys whose workspaces to cross-reference |
@ -142,9 +142,9 @@ Controls how Honcho context reaches the agent:
| Mode | Behavior | | Mode | Behavior |
|------|----------| |------|----------|
| `hybrid` | Prefetch context into system prompt + expose tools (default) | | `hybrid` | Auto-injected context + Honcho tools available (default) |
| `context` | Context injection only — no Honcho tools available | | `context` | Auto-injected context only — Honcho tools hidden |
| `tools` | Tools only — no prefetch into system prompt | | `tools` | Honcho tools only — no auto-injected context |
### Write Frequency ### Write Frequency
@ -203,23 +203,23 @@ honcho: {}
## How It Works ## How It Works
### Async Prefetch Pipeline ### Async Context Pipeline
Honcho context is fetched asynchronously to avoid blocking the response path: Honcho context is fetched asynchronously to avoid blocking the response path:
``` ```
Turn N: Turn N:
user message user message
pop prefetch result from cache (from previous turn) consume cached context (from previous turn's background fetch)
→ inject into system prompt (user representation, AI representation, dialectic) → inject into system prompt (user representation, AI representation, dialectic)
→ LLM call → LLM call
→ response → response
→ fire prefetch in background threads → fire background fetch for next turn
prefetch_context() ─┐ fetch context ─┐
prefetch_dialectic() ─┴→ cache for Turn N+1 fetch dialectic ─┴→ cache for Turn N+1
``` ```
Turn 1 is a cold start (no cache). All subsequent turns consume pre-warmed results with zero HTTP latency on the response path. The system prompt on turn 1 uses only static context to preserve prefix cache hits at the LLM provider. Turn 1 is a cold start (no cache). All subsequent turns consume cached results with zero HTTP latency on the response path. The system prompt on turn 1 uses only static context to preserve prefix cache hits at the LLM provider.
### Dual-Peer Architecture ### Dual-Peer Architecture