feat: add persistent memory system + SQLite session store
Two-part implementation: Part A - Curated Bounded Memory: - New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores - Character-limited (2200/1375 chars), § delimited entries - Frozen snapshot injected into system prompt at session start - Model manages pruning via replace/remove with substring matching - Usage indicator shown in system prompt header Part B - SQLite Session Store: - New hermes_state.py with SessionDB class, FTS5 full-text search - Gateway session.py rewritten to dual-write SQLite + legacy JSONL - Compression-triggered session splitting with parent_session_id chains - New session_search tool with Gemini Flash summarization of matched sessions - CLI session lifecycle (create on launch, close on exit) Also: - System prompt now cached per session, only rebuilt on compression (fixes prefix cache invalidation from date/time changes every turn) - Config version bumped to 3, hermes doctor checks for new artifacts - Disabled in batch_runner and RL environments
This commit is contained in:
parent
655303f2f1
commit
440c244cac
19 changed files with 2397 additions and 327 deletions
303
run_agent.py
303
run_agent.py
|
|
@ -875,6 +875,24 @@ def _build_tool_preview(tool_name: str, args: dict, max_len: int = 40) -> str:
|
|||
else:
|
||||
return f"planning {len(todos_arg)} task(s)"
|
||||
|
||||
if tool_name == "session_search":
|
||||
query = args.get("query", "")
|
||||
return f"recall: \"{query[:25]}{'...' if len(query) > 25 else ''}\""
|
||||
|
||||
if tool_name == "memory":
|
||||
action = args.get("action", "")
|
||||
target = args.get("target", "")
|
||||
if action == "add":
|
||||
content = args.get("content", "")
|
||||
return f"+{target}: \"{content[:25]}{'...' if len(content) > 25 else ''}\""
|
||||
elif action == "replace":
|
||||
return f"~{target}: \"{args.get('old_text', '')[:20]}\""
|
||||
elif action == "remove":
|
||||
return f"-{target}: \"{args.get('old_text', '')[:20]}\""
|
||||
elif action == "read":
|
||||
return f"read {target}"
|
||||
return action
|
||||
|
||||
if tool_name == "send_message":
|
||||
target = args.get("target", "?")
|
||||
msg = args.get("message", "")
|
||||
|
|
@ -1061,6 +1079,8 @@ class AIAgent:
|
|||
prefill_messages: List[Dict[str, Any]] = None,
|
||||
platform: str = None,
|
||||
skip_context_files: bool = False,
|
||||
skip_memory: bool = False,
|
||||
session_db=None,
|
||||
):
|
||||
"""
|
||||
Initialize the AI Agent.
|
||||
|
|
@ -1269,10 +1289,51 @@ class AIAgent:
|
|||
# Track conversation messages for session logging
|
||||
self._session_messages: List[Dict[str, Any]] = []
|
||||
|
||||
# Cached system prompt -- built once per session, only rebuilt on compression
|
||||
self._cached_system_prompt: Optional[str] = None
|
||||
|
||||
# SQLite session store (optional -- provided by CLI or gateway)
|
||||
self._session_db = session_db
|
||||
if self._session_db:
|
||||
try:
|
||||
self._session_db.create_session(
|
||||
session_id=self.session_id,
|
||||
source=self.platform or "cli",
|
||||
model=self.model,
|
||||
model_config={
|
||||
"max_iterations": self.max_iterations,
|
||||
"reasoning_config": reasoning_config,
|
||||
"max_tokens": max_tokens,
|
||||
},
|
||||
user_id=None,
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# In-memory todo list for task planning (one per agent/session)
|
||||
from tools.todo_tool import TodoStore
|
||||
self._todo_store = TodoStore()
|
||||
|
||||
# Persistent memory (MEMORY.md + USER.md) -- loaded from disk
|
||||
self._memory_store = None
|
||||
self._memory_enabled = False
|
||||
self._user_profile_enabled = False
|
||||
if not skip_memory:
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_mem_config
|
||||
mem_config = _load_mem_config().get("memory", {})
|
||||
self._memory_enabled = mem_config.get("memory_enabled", False)
|
||||
self._user_profile_enabled = mem_config.get("user_profile_enabled", False)
|
||||
if self._memory_enabled or self._user_profile_enabled:
|
||||
from tools.memory_tool import MemoryStore
|
||||
self._memory_store = MemoryStore(
|
||||
memory_char_limit=mem_config.get("memory_char_limit", 2200),
|
||||
user_char_limit=mem_config.get("user_char_limit", 1375),
|
||||
)
|
||||
self._memory_store.load_from_disk()
|
||||
except Exception:
|
||||
pass # Memory is optional -- don't break agent init
|
||||
|
||||
# Initialize context compressor for automatic context management
|
||||
# Compresses conversation when approaching model's context limit
|
||||
# Configuration via environment variables (can be set in .env or cli-config.yaml)
|
||||
|
|
@ -1452,6 +1513,32 @@ class AIAgent:
|
|||
else:
|
||||
return f"┊ 📋 plan {len(todos_arg)} task(s) {dur}"
|
||||
|
||||
# ── Session Search ──
|
||||
if tool_name == "session_search":
|
||||
query = _trunc(args.get("query", ""), 35)
|
||||
return f"┊ 🔍 recall \"{query}\" {dur}"
|
||||
|
||||
# ── Memory ──
|
||||
if tool_name == "memory":
|
||||
action = args.get("action", "?")
|
||||
target = args.get("target", "")
|
||||
if action == "add":
|
||||
preview = _trunc(args.get("content", ""), 30)
|
||||
return f"┊ 🧠 memory +{target}: \"{preview}\" {dur}"
|
||||
elif action == "replace":
|
||||
snippet = _trunc(args.get("old_text", ""), 20)
|
||||
return f"┊ 🧠 memory ~{target}: \"{snippet}\" {dur}"
|
||||
elif action == "remove":
|
||||
snippet = _trunc(args.get("old_text", ""), 20)
|
||||
return f"┊ 🧠 memory -{target}: \"{snippet}\" {dur}"
|
||||
elif action == "read":
|
||||
return f"┊ 🧠 memory read {target} {dur}"
|
||||
elif action == "search_sessions":
|
||||
query = _trunc(args.get("content", ""), 30)
|
||||
return f"┊ 🧠 recall \"{query}\" {dur}"
|
||||
else:
|
||||
return f"┊ 🧠 memory {action} {dur}"
|
||||
|
||||
# ── Skills ──
|
||||
if tool_name == "skills_list":
|
||||
return f"┊ 📚 skills list {args.get('category', 'all')} {dur}"
|
||||
|
|
@ -2041,6 +2128,70 @@ class AIAgent:
|
|||
"""Check if an interrupt has been requested."""
|
||||
return self._interrupt_requested
|
||||
|
||||
def _build_system_prompt(self, system_message: str = None) -> str:
|
||||
"""
|
||||
Assemble the full system prompt from all layers.
|
||||
|
||||
Called once per session (cached on self._cached_system_prompt) and only
|
||||
rebuilt after context compression events. This ensures the system prompt
|
||||
is stable across all turns in a session, maximizing prefix cache hits.
|
||||
"""
|
||||
# Layers (in order):
|
||||
# 1. Default agent identity (always present)
|
||||
# 2. User / gateway system prompt (if provided)
|
||||
# 3. Persistent memory (frozen snapshot)
|
||||
# 4. Skills guidance (if skills tools are loaded)
|
||||
# 5. Context files (SOUL.md, AGENTS.md, .cursorrules)
|
||||
# 6. Current date & time (frozen at build time)
|
||||
# 7. Platform-specific formatting hint
|
||||
prompt_parts = [DEFAULT_AGENT_IDENTITY]
|
||||
|
||||
caller_prompt = system_message if system_message is not None else self.ephemeral_system_prompt
|
||||
if caller_prompt:
|
||||
prompt_parts.append(caller_prompt)
|
||||
|
||||
if self._memory_store:
|
||||
if self._memory_enabled:
|
||||
mem_block = self._memory_store.format_for_system_prompt("memory")
|
||||
if mem_block:
|
||||
prompt_parts.append(mem_block)
|
||||
if self._user_profile_enabled:
|
||||
user_block = self._memory_store.format_for_system_prompt("user")
|
||||
if user_block:
|
||||
prompt_parts.append(user_block)
|
||||
|
||||
has_skills_tools = any(name in self.valid_tool_names for name in ['skills_list', 'skill_view'])
|
||||
skills_prompt = build_skills_system_prompt() if has_skills_tools else ""
|
||||
if skills_prompt:
|
||||
prompt_parts.append(skills_prompt)
|
||||
|
||||
if not self.skip_context_files:
|
||||
context_files_prompt = build_context_files_prompt()
|
||||
if context_files_prompt:
|
||||
prompt_parts.append(context_files_prompt)
|
||||
|
||||
now = datetime.now()
|
||||
prompt_parts.append(
|
||||
f"Conversation started: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
|
||||
)
|
||||
|
||||
platform_key = (self.platform or "").lower().strip()
|
||||
if platform_key in PLATFORM_HINTS:
|
||||
prompt_parts.append(PLATFORM_HINTS[platform_key])
|
||||
|
||||
return "\n\n".join(prompt_parts)
|
||||
|
||||
def _invalidate_system_prompt(self):
|
||||
"""
|
||||
Invalidate the cached system prompt, forcing a rebuild on the next turn.
|
||||
|
||||
Called after context compression events. Also reloads memory from disk
|
||||
so the rebuilt prompt captures any writes from this session.
|
||||
"""
|
||||
self._cached_system_prompt = None
|
||||
if self._memory_store:
|
||||
self._memory_store.load_from_disk()
|
||||
|
||||
def run_conversation(
|
||||
self,
|
||||
user_message: str,
|
||||
|
|
@ -2093,47 +2244,27 @@ class AIAgent:
|
|||
if not self.quiet_mode:
|
||||
print(f"💬 Starting conversation: '{user_message[:60]}{'...' if len(user_message) > 60 else ''}'")
|
||||
|
||||
# ── Build the full system prompt ──
|
||||
# Layers (in order):
|
||||
# 1. Default agent identity (always present)
|
||||
# 2. User / gateway system prompt (if provided)
|
||||
# 3. Skills guidance (if skills tools are loaded)
|
||||
# 4. Context files (SOUL.md, AGENTS.md, .cursorrules)
|
||||
# 5. Current date & time
|
||||
# 6. Platform-specific formatting hint
|
||||
prompt_parts = [DEFAULT_AGENT_IDENTITY]
|
||||
# ── System prompt (cached per session for prefix caching) ──
|
||||
# Built once on first call, reused for all subsequent calls.
|
||||
# Only rebuilt after context compression events (which invalidate
|
||||
# the cache and reload memory from disk).
|
||||
if self._cached_system_prompt is None:
|
||||
self._cached_system_prompt = self._build_system_prompt(system_message)
|
||||
# Store the system prompt snapshot in SQLite
|
||||
if self._session_db:
|
||||
try:
|
||||
self._session_db.update_system_prompt(self.session_id, self._cached_system_prompt)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Layer in the caller-supplied system prompt (explicit > ephemeral).
|
||||
caller_prompt = system_message if system_message is not None else self.ephemeral_system_prompt
|
||||
if caller_prompt:
|
||||
prompt_parts.append(caller_prompt)
|
||||
active_system_prompt = self._cached_system_prompt
|
||||
|
||||
# Auto-include skills guidance if skills tools are available.
|
||||
has_skills_tools = any(name in self.valid_tool_names for name in ['skills_list', 'skill_view'])
|
||||
skills_prompt = build_skills_system_prompt() if has_skills_tools else ""
|
||||
if skills_prompt:
|
||||
prompt_parts.append(skills_prompt)
|
||||
|
||||
# Auto-include context files (SOUL.md, AGENTS.md, .cursorrules).
|
||||
# Skipped for batch processing / data generation to avoid polluting trajectories.
|
||||
if not self.skip_context_files:
|
||||
context_files_prompt = build_context_files_prompt()
|
||||
if context_files_prompt:
|
||||
prompt_parts.append(context_files_prompt)
|
||||
|
||||
# Current local date and time so the model is never confused about
|
||||
# what day/time it is (LLM training cutoffs can otherwise mislead it).
|
||||
now = datetime.now()
|
||||
prompt_parts.append(
|
||||
f"Current local date and time: {now.strftime('%A, %B %d, %Y %I:%M %p')}"
|
||||
)
|
||||
|
||||
# Platform-specific formatting hint (no markdown on WhatsApp, etc.).
|
||||
platform_key = (self.platform or "").lower().strip()
|
||||
if platform_key in PLATFORM_HINTS:
|
||||
prompt_parts.append(PLATFORM_HINTS[platform_key])
|
||||
|
||||
active_system_prompt = "\n\n".join(prompt_parts)
|
||||
# Log user message to SQLite
|
||||
if self._session_db:
|
||||
try:
|
||||
self._session_db.append_message(self.session_id, "user", user_message)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Main conversation loop
|
||||
api_call_count = 0
|
||||
|
|
@ -2510,6 +2641,25 @@ class AIAgent:
|
|||
todo_snapshot = self._todo_store.format_for_injection()
|
||||
if todo_snapshot:
|
||||
messages.append({"role": "user", "content": todo_snapshot})
|
||||
# Rebuild system prompt with fresh date/time + memory
|
||||
self._invalidate_system_prompt()
|
||||
active_system_prompt = self._build_system_prompt(system_message)
|
||||
self._cached_system_prompt = active_system_prompt
|
||||
# Split session in SQLite (close old, open new with parent link)
|
||||
if self._session_db:
|
||||
try:
|
||||
self._session_db.end_session(self.session_id, "compression")
|
||||
old_session_id = self.session_id
|
||||
self.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
||||
self._session_db.create_session(
|
||||
session_id=self.session_id,
|
||||
source=self.platform or "cli",
|
||||
model=self.model,
|
||||
parent_session_id=old_session_id,
|
||||
)
|
||||
self._session_db.update_system_prompt(self.session_id, active_system_prompt)
|
||||
except Exception:
|
||||
pass
|
||||
print(f"{self.log_prefix} 🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
|
||||
continue # Retry with compressed messages
|
||||
else:
|
||||
|
|
@ -2769,9 +2919,33 @@ class AIAgent:
|
|||
store=self._todo_store,
|
||||
)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
# Show clean output in quiet mode (no spinner needed -- instant)
|
||||
if self.quiet_mode:
|
||||
print(f" {self._get_cute_tool_message('todo', function_args, tool_duration)}")
|
||||
# Session search -- handle directly (needs SessionDB instance)
|
||||
elif function_name == "session_search" and self._session_db:
|
||||
from tools.session_search_tool import session_search as _session_search
|
||||
function_result = _session_search(
|
||||
query=function_args.get("query", ""),
|
||||
role_filter=function_args.get("role_filter"),
|
||||
limit=function_args.get("limit", 3),
|
||||
db=self._session_db,
|
||||
)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if self.quiet_mode:
|
||||
print(f" {self._get_cute_tool_message('session_search', function_args, tool_duration)}")
|
||||
# Memory tool -- handle directly (needs agent's MemoryStore instance)
|
||||
elif function_name == "memory":
|
||||
from tools.memory_tool import memory_tool as _memory_tool
|
||||
function_result = _memory_tool(
|
||||
action=function_args.get("action"),
|
||||
target=function_args.get("target", "memory"),
|
||||
content=function_args.get("content"),
|
||||
old_text=function_args.get("old_text"),
|
||||
store=self._memory_store,
|
||||
)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if self.quiet_mode:
|
||||
print(f" {self._get_cute_tool_message('memory', function_args, tool_duration)}")
|
||||
# Execute other tools - with animated kawaii spinner in quiet mode
|
||||
# The face is "alive" while the tool works, then vanishes
|
||||
# and is replaced by the clean result line.
|
||||
|
|
@ -2790,7 +2964,7 @@ class AIAgent:
|
|||
'vision_analyze': '👁️', 'mixture_of_agents': '🧠',
|
||||
'skills_list': '📚', 'skill_view': '📚',
|
||||
'schedule_cronjob': '⏰', 'list_cronjobs': '⏰', 'remove_cronjob': '⏰',
|
||||
'send_message': '📨', 'todo': '📋',
|
||||
'send_message': '📨', 'todo': '📋', 'memory': '🧠', 'session_search': '🔍',
|
||||
}
|
||||
emoji = tool_emoji_map.get(function_name, '⚡')
|
||||
preview = _build_tool_preview(function_name, function_args) or function_name
|
||||
|
|
@ -2852,10 +3026,29 @@ class AIAgent:
|
|||
messages,
|
||||
current_tokens=self.context_compressor.last_prompt_tokens
|
||||
)
|
||||
# Re-inject todo state after compression (cache already invalidated)
|
||||
# Re-inject todo state after compression
|
||||
todo_snapshot = self._todo_store.format_for_injection()
|
||||
if todo_snapshot:
|
||||
messages.append({"role": "user", "content": todo_snapshot})
|
||||
# Rebuild system prompt with fresh date/time + memory
|
||||
self._invalidate_system_prompt()
|
||||
active_system_prompt = self._build_system_prompt(system_message)
|
||||
self._cached_system_prompt = active_system_prompt
|
||||
# Split session in SQLite (close old, open new with parent link)
|
||||
if self._session_db:
|
||||
try:
|
||||
self._session_db.end_session(self.session_id, "compression")
|
||||
old_session_id = self.session_id
|
||||
self.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}"
|
||||
self._session_db.create_session(
|
||||
session_id=self.session_id,
|
||||
source=self.platform or "cli",
|
||||
model=self.model,
|
||||
parent_session_id=old_session_id,
|
||||
)
|
||||
self._session_db.update_system_prompt(self.session_id, active_system_prompt)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Save session log incrementally (so progress is visible even if interrupted)
|
||||
self._session_messages = messages
|
||||
|
|
@ -3042,6 +3235,32 @@ class AIAgent:
|
|||
self._session_messages = messages
|
||||
self._save_session_log(messages)
|
||||
|
||||
# Log new messages to SQLite session store (everything after the user message we already logged)
|
||||
if self._session_db:
|
||||
try:
|
||||
# Skip messages that were in the conversation history before this call
|
||||
# (the user message was already logged at the start of run_conversation)
|
||||
start_idx = (len(conversation_history) if conversation_history else 0) + 1 # +1 for the user msg
|
||||
for msg in messages[start_idx:]:
|
||||
role = msg.get("role", "unknown")
|
||||
content = msg.get("content")
|
||||
# Extract tool call info from assistant messages
|
||||
tool_calls_data = None
|
||||
if hasattr(msg, "tool_calls") and msg.tool_calls:
|
||||
tool_calls_data = [{"name": tc.function.name, "arguments": tc.function.arguments} for tc in msg.tool_calls]
|
||||
elif isinstance(msg.get("tool_calls"), list):
|
||||
tool_calls_data = msg["tool_calls"]
|
||||
self._session_db.append_message(
|
||||
session_id=self.session_id,
|
||||
role=role,
|
||||
content=content,
|
||||
tool_name=msg.get("tool_name"),
|
||||
tool_calls=tool_calls_data,
|
||||
tool_call_id=msg.get("tool_call_id"),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Build result with interrupt info if applicable
|
||||
result = {
|
||||
"final_response": final_response,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue