feat: add persistent memory system + SQLite session store

Two-part implementation:

Part A - Curated Bounded Memory:
- New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores
- Character-limited (2200/1375 chars), § delimited entries
- Frozen snapshot injected into system prompt at session start
- Model manages pruning via replace/remove with substring matching
- Usage indicator shown in system prompt header

Part B - SQLite Session Store:
- New hermes_state.py with SessionDB class, FTS5 full-text search
- Gateway session.py rewritten to dual-write SQLite + legacy JSONL
- Compression-triggered session splitting with parent_session_id chains
- New session_search tool with Gemini Flash summarization of matched sessions
- CLI session lifecycle (create on launch, close on exit)

Also:
- System prompt now cached per session, only rebuilt on compression
  (fixes prefix cache invalidation from date/time changes every turn)
- Config version bumped to 3, hermes doctor checks for new artifacts
- Disabled in batch_runner and RL environments
This commit is contained in:
teknium1 2026-02-19 00:57:31 -08:00
parent 655303f2f1
commit 440c244cac
19 changed files with 2397 additions and 327 deletions

View file

@ -875,6 +875,24 @@ def _build_tool_preview(tool_name: str, args: dict, max_len: int = 40) -> str:
else:
return f"planning {len(todos_arg)} task(s)"
if tool_name == "session_search":
query = args.get("query", "")
return f"recall: \"{query[:25]}{'...' if len(query) > 25 else ''}\""
if tool_name == "memory":
action = args.get("action", "")
target = args.get("target", "")
if action == "add":
content = args.get("content", "")
return f"+{target}: \"{content[:25]}{'...' if len(content) > 25 else ''}\""
elif action == "replace":
return f"~{target}: \"{args.get('old_text', '')[:20]}\""
elif action == "remove":
return f"-{target}: \"{args.get('old_text', '')[:20]}\""
elif action == "read":
return f"read {target}"
return action
if tool_name == "send_message":
target = args.get("target", "?")
msg = args.get("message", "")
@ -1061,6 +1079,8 @@ class AIAgent:
prefill_messages: List[Dict[str, Any]] = None,
platform: str = None,
skip_context_files: bool = False,
skip_memory: bool = False,
session_db=None,
):
"""
Initialize the AI Agent.
@ -1269,10 +1289,51 @@ class AIAgent:
# Track conversation messages for session logging
self._session_messages: List[Dict[str, Any]] = []
# Cached system prompt -- built once per session, only rebuilt on compression
self._cached_system_prompt: Optional[str] = None
# SQLite session store (optional -- provided by CLI or gateway)
self._session_db = session_db
if self._session_db:
try:
self._session_db.create_session(
session_id=self.session_id,
source=self.platform or "cli",
model=self.model,
model_config={
"max_iterations": self.max_iterations,
"reasoning_config": reasoning_config,
"max_tokens": max_tokens,
},
user_id=None,
)
except Exception:
pass
# In-memory todo list for task planning (one per agent/session)
from tools.todo_tool import TodoStore
self._todo_store = TodoStore()
# Persistent memory (MEMORY.md + USER.md) -- loaded from disk
self._memory_store = None
self._memory_enabled = False
self._user_profile_enabled = False
if not skip_memory:
try:
from hermes_cli.config import load_config as _load_mem_config
mem_config = _load_mem_config().get("memory", {})
self._memory_enabled = mem_config.get("memory_enabled", False)
self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
if self._memory_enabled or self._user_profile_enabled:
from tools.memory_tool import MemoryStore
self._memory_store = MemoryStore(
memory_char_limit=mem_config.get("memory_char_limit", 2200),
user_char_limit=mem_config.get("user_char_limit", 1375),
)
self._memory_store.load_from_disk()
except Exception:
pass # Memory is optional -- don't break agent init
# Initialize context compressor for automatic context management
# Compresses conversation when approaching model's context limit
# Configuration via environment variables (can be set in .env or cli-config.yaml)
@ -1452,6 +1513,32 @@ class AIAgent:
else:
return f"┊ 📋 plan {len(todos_arg)} task(s) {dur}"
# ── Session Search ──
if tool_name == "session_search":
query = _trunc(args.get("query", ""), 35)
return f"┊ 🔍 recall \"{query}\" {dur}"
# ── Memory ──
if tool_name == "memory":
action = args.get("action", "?")
target = args.get("target", "")
if action == "add":
preview = _trunc(args.get("content", ""), 30)
return f"┊ 🧠 memory +{target}: \"{preview}\" {dur}"
elif action == "replace":
snippet = _trunc(args.get("old_text", ""), 20)
return f"┊ 🧠 memory ~{target}: \"{snippet}\" {dur}"
elif action == "remove":
snippet = _trunc(args.get("old_text", ""), 20)
return f"┊ 🧠 memory -{target}: \"{snippet}\" {dur}"
elif action == "read":
return f"┊ 🧠 memory read {target} {dur}"
elif action == "search_sessions":
query = _trunc(args.get("content", ""), 30)
return f"┊ 🧠 recall \"{query}\" {dur}"
else:
return f"┊ 🧠 memory {action} {dur}"
# ── Skills ──
if tool_name == "skills_list":
return f"┊ 📚 skills list {args.get('category', 'all')} {dur}"
@ -2041,6 +2128,70 @@ class AIAgent:
"""Check if an interrupt has been requested."""
return self._interrupt_requested
def _build_system_prompt(self, system_message: str = None) -> str:
"""
Assemble the full system prompt from all layers.
Called once per session (cached on self._cached_system_prompt) and only
rebuilt after context compression events. This ensures the system prompt
is stable across all turns in a session, maximizing prefix cache hits.
"""
# Layers (in order):
# 1. Default agent identity (always present)
# 2. User / gateway system prompt (if provided)
# 3. Persistent memory (frozen snapshot)
# 4. Skills guidance (if skills tools are loaded)
# 5. Context files (SOUL.md, AGENTS.md, .cursorrules)
# 6. Current date & time (frozen at build time)
# 7. Platform-specific formatting hint
prompt_parts = [DEFAULT_AGENT_IDENTITY]
caller_prompt = system_message if system_message is not None else self.ephemeral_system_prompt
if caller_prompt:
prompt_parts.append(caller_prompt)
if self._memory_store:
if self._memory_enabled:
mem_block = self._memory_store.format_for_system_prompt("memory")
if mem_block:
prompt_parts.append(mem_block)
if self._user_profile_enabled:
user_block = self._memory_store.format_for_system_prompt("user")
if user_block:
prompt_parts.append(user_block)
has_skills_tools = any(name in self.valid_tool_names for name in ['skills_list', 'skill_view'])
skills_prompt = build_skills_system_prompt() if has_skills_tools else ""
if skills_prompt:
prompt_parts.append(skills_prompt)
if not self.skip_context_files:
context_files_prompt = build_context_files_prompt()
if context_files_prompt:
prompt_parts.append(context_files_prompt)
now = datetime.now()
prompt_parts.append(
f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
)
platform_key = (self.platform or "").lower().strip()
if platform_key in PLATFORM_HINTS:
prompt_parts.append(PLATFORM_HINTS[platform_key])
return "\n\n".join(prompt_parts)
def _invalidate_system_prompt(self):
"""
Invalidate the cached system prompt, forcing a rebuild on the next turn.
Called after context compression events. Also reloads memory from disk
so the rebuilt prompt captures any writes from this session.
"""
self._cached_system_prompt = None
if self._memory_store:
self._memory_store.load_from_disk()
def run_conversation(
self,
user_message: str,
@ -2093,47 +2244,27 @@ class AIAgent:
if not self.quiet_mode:
print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
# ── Build the full system prompt ──
# Layers (in order):
# 1. Default agent identity (always present)
# 2. User / gateway system prompt (if provided)
# 3. Skills guidance (if skills tools are loaded)
# 4. Context files (SOUL.md, AGENTS.md, .cursorrules)
# 5. Current date & time
# 6. Platform-specific formatting hint
prompt_parts = [DEFAULT_AGENT_IDENTITY]
# ── System prompt (cached per session for prefix caching) ──
# Built once on first call, reused for all subsequent calls.
# Only rebuilt after context compression events (which invalidate
# the cache and reload memory from disk).
if self._cached_system_prompt is None:
self._cached_system_prompt = self._build_system_prompt(system_message)
# Store the system prompt snapshot in SQLite
if self._session_db:
try:
self._session_db.update_system_prompt(self.session_id, self._cached_system_prompt)
except Exception:
pass
# Layer in the caller-supplied system prompt (explicit > ephemeral).
caller_prompt = system_message if system_message is not None else self.ephemeral_system_prompt
if caller_prompt:
prompt_parts.append(caller_prompt)
active_system_prompt = self._cached_system_prompt
# Auto-include skills guidance if skills tools are available.
has_skills_tools = any(name in self.valid_tool_names for name in ['skills_list', 'skill_view'])
skills_prompt = build_skills_system_prompt() if has_skills_tools else ""
if skills_prompt:
prompt_parts.append(skills_prompt)
# Auto-include context files (SOUL.md, AGENTS.md, .cursorrules).
# Skipped for batch processing / data generation to avoid polluting trajectories.
if not self.skip_context_files:
context_files_prompt = build_context_files_prompt()
if context_files_prompt:
prompt_parts.append(context_files_prompt)
# Current local date and time so the model is never confused about
# what day/time it is (LLM training cutoffs can otherwise mislead it).
now = datetime.now()
prompt_parts.append(
f"Current local date and time: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
)
# Platform-specific formatting hint (no markdown on WhatsApp, etc.).
platform_key = (self.platform or "").lower().strip()
if platform_key in PLATFORM_HINTS:
prompt_parts.append(PLATFORM_HINTS[platform_key])
active_system_prompt = "\n\n".join(prompt_parts)
# Log user message to SQLite
if self._session_db:
try:
self._session_db.append_message(self.session_id, "user", user_message)
except Exception:
pass
# Main conversation loop
api_call_count = 0
@ -2510,6 +2641,25 @@ class AIAgent:
todo_snapshot = self._todo_store.format_for_injection()
if todo_snapshot:
messages.append({"role": "user", "content": todo_snapshot})
# Rebuild system prompt with fresh date/time + memory
self._invalidate_system_prompt()
active_system_prompt = self._build_system_prompt(system_message)
self._cached_system_prompt = active_system_prompt
# Split session in SQLite (close old, open new with parent link)
if self._session_db:
try:
self._session_db.end_session(self.session_id, "compression")
old_session_id = self.session_id
self.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
self._session_db.create_session(
session_id=self.session_id,
source=self.platform or "cli",
model=self.model,
parent_session_id=old_session_id,
)
self._session_db.update_system_prompt(self.session_id, active_system_prompt)
except Exception:
pass
print(f"{self.log_prefix} 🗜️ Compressed {original_len}{len(messages)} messages, retrying...")
continue # Retry with compressed messages
else:
@ -2769,9 +2919,33 @@ class AIAgent:
store=self._todo_store,
)
tool_duration = time.time() - tool_start_time
# Show clean output in quiet mode (no spinner needed -- instant)
if self.quiet_mode:
print(f" {self._get_cute_tool_message('todo', function_args, tool_duration)}")
# Session search -- handle directly (needs SessionDB instance)
elif function_name == "session_search" and self._session_db:
from tools.session_search_tool import session_search as _session_search
function_result = _session_search(
query=function_args.get("query", ""),
role_filter=function_args.get("role_filter"),
limit=function_args.get("limit", 3),
db=self._session_db,
)
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
print(f" {self._get_cute_tool_message('session_search', function_args, tool_duration)}")
# Memory tool -- handle directly (needs agent's MemoryStore instance)
elif function_name == "memory":
from tools.memory_tool import memory_tool as _memory_tool
function_result = _memory_tool(
action=function_args.get("action"),
target=function_args.get("target", "memory"),
content=function_args.get("content"),
old_text=function_args.get("old_text"),
store=self._memory_store,
)
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
print(f" {self._get_cute_tool_message('memory', function_args, tool_duration)}")
# Execute other tools - with animated kawaii spinner in quiet mode
# The face is "alive" while the tool works, then vanishes
# and is replaced by the clean result line.
@ -2790,7 +2964,7 @@ class AIAgent:
'vision_analyze': '👁️', 'mixture_of_agents': '🧠',
'skills_list': '📚', 'skill_view': '📚',
'schedule_cronjob': '', 'list_cronjobs': '', 'remove_cronjob': '',
'send_message': '📨', 'todo': '📋',
'send_message': '📨', 'todo': '📋', 'memory': '🧠', 'session_search': '🔍',
}
emoji = tool_emoji_map.get(function_name, '')
preview = _build_tool_preview(function_name, function_args) or function_name
@ -2852,10 +3026,29 @@ class AIAgent:
messages,
current_tokens=self.context_compressor.last_prompt_tokens
)
# Re-inject todo state after compression (cache already invalidated)
# Re-inject todo state after compression
todo_snapshot = self._todo_store.format_for_injection()
if todo_snapshot:
messages.append({"role": "user", "content": todo_snapshot})
# Rebuild system prompt with fresh date/time + memory
self._invalidate_system_prompt()
active_system_prompt = self._build_system_prompt(system_message)
self._cached_system_prompt = active_system_prompt
# Split session in SQLite (close old, open new with parent link)
if self._session_db:
try:
self._session_db.end_session(self.session_id, "compression")
old_session_id = self.session_id
self.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
self._session_db.create_session(
session_id=self.session_id,
source=self.platform or "cli",
model=self.model,
parent_session_id=old_session_id,
)
self._session_db.update_system_prompt(self.session_id, active_system_prompt)
except Exception:
pass
# Save session log incrementally (so progress is visible even if interrupted)
self._session_messages = messages
@ -3042,6 +3235,32 @@ class AIAgent:
self._session_messages = messages
self._save_session_log(messages)
# Log new messages to SQLite session store (everything after the user message we already logged)
if self._session_db:
try:
# Skip messages that were in the conversation history before this call
# (the user message was already logged at the start of run_conversation)
start_idx = (len(conversation_history) if conversation_history else 0) + 1 # +1 for the user msg
for msg in messages[start_idx:]:
role = msg.get("role", "unknown")
content = msg.get("content")
# Extract tool call info from assistant messages
tool_calls_data = None
if hasattr(msg, "tool_calls") and msg.tool_calls:
tool_calls_data = [{"name": tc.function.name, "arguments": tc.function.arguments} for tc in msg.tool_calls]
elif isinstance(msg.get("tool_calls"), list):
tool_calls_data = msg["tool_calls"]
self._session_db.append_message(
session_id=self.session_id,
role=role,
content=content,
tool_name=msg.get("tool_name"),
tool_calls=tool_calls_data,
tool_call_id=msg.get("tool_call_id"),
)
except Exception:
pass
# Build result with interrupt info if applicable
result = {
"final_response": final_response,