fix: tighten memory and session recall guidance
Remove diary-style memory framing from the system prompt and memory tool schema, explicitly steer task/session logs to session_search, and clarify that session_search is for cross-session recall after checking the current conversation first. Add regression tests for the updated guidance text.
This commit is contained in:
parent
6d8286f396
commit
5319bb6ac4
6 changed files with 66 additions and 16 deletions
|
|
@ -71,15 +71,17 @@ DEFAULT_AGENT_IDENTITY = (
|
||||||
)
|
)
|
||||||
|
|
||||||
MEMORY_GUIDANCE = (
|
MEMORY_GUIDANCE = (
|
||||||
"You have persistent memory across sessions. Proactively save important things "
|
"You have persistent memory across sessions. Save durable facts using the memory "
|
||||||
"you learn (user preferences, environment details, useful approaches) and do "
|
"tool: user preferences, environment details, tool quirks, and stable conventions. "
|
||||||
"(like a diary!) using the memory tool -- don't wait to be asked."
|
"Memory is injected into every turn, so keep it compact. Do NOT save task progress, "
|
||||||
|
"session outcomes, or completed-work logs to memory; use session_search to recall "
|
||||||
|
"those from past transcripts."
|
||||||
)
|
)
|
||||||
|
|
||||||
SESSION_SEARCH_GUIDANCE = (
|
SESSION_SEARCH_GUIDANCE = (
|
||||||
"When the user references something from a past conversation or you suspect "
|
"When the user references something from a past conversation or you suspect "
|
||||||
"relevant prior context exists, use session_search to recall it before asking "
|
"relevant cross-session context exists, use session_search to recall it before "
|
||||||
"them to repeat themselves."
|
"asking them to repeat themselves."
|
||||||
)
|
)
|
||||||
|
|
||||||
SKILLS_GUIDANCE = (
|
SKILLS_GUIDANCE = (
|
||||||
|
|
|
||||||
|
|
@ -15,10 +15,30 @@ from agent.prompt_builder import (
|
||||||
build_context_files_prompt,
|
build_context_files_prompt,
|
||||||
CONTEXT_FILE_MAX_CHARS,
|
CONTEXT_FILE_MAX_CHARS,
|
||||||
DEFAULT_AGENT_IDENTITY,
|
DEFAULT_AGENT_IDENTITY,
|
||||||
|
MEMORY_GUIDANCE,
|
||||||
|
SESSION_SEARCH_GUIDANCE,
|
||||||
PLATFORM_HINTS,
|
PLATFORM_HINTS,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Guidance constants
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
|
||||||
|
class TestGuidanceConstants:
|
||||||
|
def test_memory_guidance_discourages_task_logs(self):
|
||||||
|
assert "durable facts" in MEMORY_GUIDANCE
|
||||||
|
assert "Do NOT save task progress" in MEMORY_GUIDANCE
|
||||||
|
assert "session_search" in MEMORY_GUIDANCE
|
||||||
|
assert "like a diary" not in MEMORY_GUIDANCE
|
||||||
|
assert ">80%" not in MEMORY_GUIDANCE
|
||||||
|
|
||||||
|
def test_session_search_guidance_is_simple_cross_session_recall(self):
|
||||||
|
assert "relevant cross-session context exists" in SESSION_SEARCH_GUIDANCE
|
||||||
|
assert "recent turns of the current session" not in SESSION_SEARCH_GUIDANCE
|
||||||
|
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Context injection scanning
|
# Context injection scanning
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
|
||||||
|
|
@ -9,9 +9,24 @@ from tools.memory_tool import (
|
||||||
memory_tool,
|
memory_tool,
|
||||||
_scan_memory_content,
|
_scan_memory_content,
|
||||||
ENTRY_DELIMITER,
|
ENTRY_DELIMITER,
|
||||||
|
MEMORY_SCHEMA,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tool schema guidance
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestMemorySchema:
|
||||||
|
def test_discourages_diary_style_task_logs(self):
|
||||||
|
description = MEMORY_SCHEMA["description"]
|
||||||
|
assert "Do NOT save task progress" in description
|
||||||
|
assert "session_search" in description
|
||||||
|
assert "like a diary" not in description
|
||||||
|
assert "temporary task state" in description
|
||||||
|
assert ">80%" not in description
|
||||||
|
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# Security scanning
|
# Security scanning
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
|
||||||
|
|
@ -9,9 +9,21 @@ from tools.session_search_tool import (
|
||||||
_format_conversation,
|
_format_conversation,
|
||||||
_truncate_around_matches,
|
_truncate_around_matches,
|
||||||
MAX_SESSION_CHARS,
|
MAX_SESSION_CHARS,
|
||||||
|
SESSION_SEARCH_SCHEMA,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# Tool schema guidance
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
class TestSessionSearchSchema:
|
||||||
|
def test_keeps_cross_session_recall_guidance_without_current_session_nudge(self):
|
||||||
|
description = SESSION_SEARCH_SCHEMA["description"]
|
||||||
|
assert "past conversations" in description
|
||||||
|
assert "recent turns of the current session" not in description
|
||||||
|
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# _format_timestamp
|
# _format_timestamp
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
|
||||||
|
|
@ -435,24 +435,25 @@ def check_memory_requirements() -> bool:
|
||||||
MEMORY_SCHEMA = {
|
MEMORY_SCHEMA = {
|
||||||
"name": "memory",
|
"name": "memory",
|
||||||
"description": (
|
"description": (
|
||||||
"Save important information to persistent memory that survives across sessions. "
|
"Save durable information to persistent memory that survives across sessions. "
|
||||||
"Your memory appears in your system prompt at session start -- it's how you "
|
"Memory is injected into future turns, so keep it compact and focused on facts "
|
||||||
"remember things about the user and your environment between conversations.\n\n"
|
"that will still matter later.\n\n"
|
||||||
"WHEN TO SAVE (do this proactively, don't wait to be asked):\n"
|
"WHEN TO SAVE (do this proactively, don't wait to be asked):\n"
|
||||||
"- User shares a preference, habit, or personal detail (name, role, timezone, coding style)\n"
|
"- User shares a preference, habit, or personal detail (name, role, timezone, coding style)\n"
|
||||||
"- You discover something about the environment (OS, installed tools, project structure)\n"
|
"- You discover something about the environment (OS, installed tools, project structure)\n"
|
||||||
"- User corrects you or says 'remember this' / 'don't do that again'\n"
|
"- User corrects you or says 'remember this' / 'don't do that again'\n"
|
||||||
"- You learn a convention, API quirk, or workflow specific to this user's setup\n"
|
"- You learn a convention, API quirk, or workflow specific to this user's setup\n"
|
||||||
"- You completed something - log it like a diary entry\n"
|
"- You identify a stable fact that will be useful again in future sessions\n\n"
|
||||||
"- After completing a complex task, save a brief note about what was done\n\n"
|
"Do NOT save task progress, session outcomes, completed-work logs, or temporary TODO "
|
||||||
"- If you've discovered a new way to do something, solved a problem that could be necessary later, save it as a skill with the skill tool\n\n"
|
"state to memory; use session_search to recall those from past transcripts.\n"
|
||||||
|
"If you've discovered a new way to do something, solved a problem that could be "
|
||||||
|
"necessary later, save it as a skill with the skill tool.\n\n"
|
||||||
"TWO TARGETS:\n"
|
"TWO TARGETS:\n"
|
||||||
"- 'user': who the user is -- name, role, preferences, communication style, pet peeves\n"
|
"- 'user': who the user is -- name, role, preferences, communication style, pet peeves\n"
|
||||||
"- 'memory': your notes -- environment facts, project conventions, tool quirks, lessons learned\n\n"
|
"- 'memory': your notes -- environment facts, project conventions, tool quirks, lessons learned\n\n"
|
||||||
"ACTIONS: add (new entry), replace (update existing -- old_text identifies it), "
|
"ACTIONS: add (new entry), replace (update existing -- old_text identifies it), "
|
||||||
"remove (delete -- old_text identifies it).\n"
|
"remove (delete -- old_text identifies it).\n\n"
|
||||||
"Capacity shown in system prompt. When >80%, consolidate entries before adding new ones.\n\n"
|
"SKIP: trivial/obvious info, things easily re-discovered, raw data dumps, and temporary task state."
|
||||||
"SKIP: trivial/obvious info, things easily re-discovered, raw data dumps."
|
|
||||||
),
|
),
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
|
|
||||||
|
|
@ -341,8 +341,8 @@ SESSION_SEARCH_SCHEMA = {
|
||||||
"- The user references a project, person, or concept that seems familiar but isn't in memory\n"
|
"- The user references a project, person, or concept that seems familiar but isn't in memory\n"
|
||||||
"- You want to check if you've solved a similar problem before\n"
|
"- You want to check if you've solved a similar problem before\n"
|
||||||
"- The user asks 'what did we do about X?' or 'how did we fix Y?'\n\n"
|
"- The user asks 'what did we do about X?' or 'how did we fix Y?'\n\n"
|
||||||
"Don't hesitate to search -- it's fast and cheap. Better to search and confirm "
|
"Don't hesitate to search when it is actually cross-session -- it's fast and cheap. "
|
||||||
"than to guess or ask the user to repeat themselves.\n\n"
|
"Better to search and confirm than to guess or ask the user to repeat themselves.\n\n"
|
||||||
"Search syntax: keywords joined with OR for broad recall (elevenlabs OR baseten OR funding), "
|
"Search syntax: keywords joined with OR for broad recall (elevenlabs OR baseten OR funding), "
|
||||||
"phrases for exact match (\"docker networking\"), boolean (python NOT java), prefix (deploy*). "
|
"phrases for exact match (\"docker networking\"), boolean (python NOT java), prefix (deploy*). "
|
||||||
"IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses "
|
"IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses "
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue