fix: prevent infinite 400 loop on context overflow + block prompt injection via cache files (#1630, #1558)
* fix: prevent infinite 400 failure loop on context overflow (#1630) When a gateway session exceeds the model's context window, Anthropic may return a generic 400 invalid_request_error with just 'Error' as the message. This bypassed the phrase-based context-length detection, causing the agent to treat it as a non-retryable client error. Worse, the failed user message was still persisted to the transcript, making the session even larger on each attempt — creating an infinite loop. Three-layer fix: 1. run_agent.py — Fallback heuristic: when a 400 error has a very short generic message AND the session is large (>40% of context or >80 messages), treat it as a probable context overflow and trigger compression instead of aborting. 2. run_agent.py + gateway/run.py — Don't persist failed messages: when the agent returns failed=True before generating any response, skip writing the user's message to the transcript/DB. This prevents the session from growing on each failure. 3. gateway/run.py — Smarter error messages: detect context-overflow failures and suggest /compact or /reset specifically, instead of a generic 'try again' that will fail identically. * fix(skills): detect prompt injection patterns and block cache file reads Adds two security layers to prevent prompt injection via skills hub cache files (#1558): 1. read_file: blocks direct reads of ~/.hermes/skills/.hub/ directory (index-cache, catalog files). The 3.5MB clawhub_catalog_v1.json was the original injection vector — untrusted skill descriptions in the catalog contained adversarial text that the model executed. 2. skill_view: warns when skills are loaded from outside the trusted ~/.hermes/skills/ directory, and detects common injection patterns in skill content ("ignore previous instructions", "<system>", etc.). Cherry-picked from PR #1562 by ygd58. --------- Co-authored-by: buray <ygd58@users.noreply.github.com>
This commit is contained in:
parent
2d36819503
commit
96dac22194
5 changed files with 436 additions and 32 deletions
113
gateway/run.py
113
gateway/run.py
|
|
@ -1869,11 +1869,31 @@ class GatewayRunner:
|
|||
# Surface error details when the agent failed silently (final_response=None)
|
||||
if not response and agent_result.get("failed"):
|
||||
error_detail = agent_result.get("error", "unknown error")
|
||||
response = (
|
||||
f"The request failed: {str(error_detail)[:300]}\n"
|
||||
"Try again or use /reset to start a fresh session."
|
||||
error_str = str(error_detail).lower()
|
||||
|
||||
# Detect context-overflow failures and give specific guidance.
|
||||
# Generic 400 "Error" from Anthropic with large sessions is the
|
||||
# most common cause of this (#1630).
|
||||
_is_ctx_fail = any(p in error_str for p in (
|
||||
"context", "token", "too large", "too long",
|
||||
"exceed", "payload",
|
||||
)) or (
|
||||
"400" in error_str
|
||||
and len(history) > 50
|
||||
)
|
||||
|
||||
if _is_ctx_fail:
|
||||
response = (
|
||||
"⚠️ Session too large for the model's context window.\n"
|
||||
"Use /compact to compress the conversation, or "
|
||||
"/reset to start fresh."
|
||||
)
|
||||
else:
|
||||
response = (
|
||||
f"The request failed: {str(error_detail)[:300]}\n"
|
||||
"Try again or use /reset to start a fresh session."
|
||||
)
|
||||
|
||||
# If the agent's session_id changed during compression, update
|
||||
# session_entry so transcript writes below go to the right session.
|
||||
if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
|
||||
|
|
@ -1920,12 +1940,30 @@ class GatewayRunner:
|
|||
# This preserves the complete agent loop (tool_calls, tool results,
|
||||
# intermediate reasoning) so sessions can be resumed with full context
|
||||
# and transcripts are useful for debugging and training data.
|
||||
#
|
||||
# IMPORTANT: When the agent failed before producing any response
|
||||
# (e.g. context-overflow 400), do NOT persist the user's message.
|
||||
# Persisting it would make the session even larger, causing the
|
||||
# same failure on the next attempt — an infinite loop. (#1630)
|
||||
agent_failed_early = (
|
||||
agent_result.get("failed")
|
||||
and not agent_result.get("final_response")
|
||||
)
|
||||
if agent_failed_early:
|
||||
logger.info(
|
||||
"Skipping transcript persistence for failed request in "
|
||||
"session %s to prevent session growth loop.",
|
||||
session_entry.session_id,
|
||||
)
|
||||
|
||||
ts = datetime.now().isoformat()
|
||||
|
||||
# If this is a fresh session (no history), write the full tool
|
||||
# definitions as the first entry so the transcript is self-describing
|
||||
# -- the same list of dicts sent as tools=[...] in the API request.
|
||||
if not history:
|
||||
if agent_failed_early:
|
||||
pass # Skip all transcript writes — don't grow a broken session
|
||||
elif not history:
|
||||
tool_defs = agent_result.get("tools", [])
|
||||
self.session_store.append_to_transcript(
|
||||
session_entry.session_id,
|
||||
|
|
@ -1942,36 +1980,37 @@ class GatewayRunner:
|
|||
# Use the filtered history length (history_offset) that was actually
|
||||
# passed to the agent, not len(history) which includes session_meta
|
||||
# entries that were stripped before the agent saw them.
|
||||
history_len = agent_result.get("history_offset", len(history))
|
||||
new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
|
||||
|
||||
# If no new messages found (edge case), fall back to simple user/assistant
|
||||
if not new_messages:
|
||||
self.session_store.append_to_transcript(
|
||||
session_entry.session_id,
|
||||
{"role": "user", "content": message_text, "timestamp": ts}
|
||||
)
|
||||
if response:
|
||||
if not agent_failed_early:
|
||||
history_len = agent_result.get("history_offset", len(history))
|
||||
new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else []
|
||||
|
||||
# If no new messages found (edge case), fall back to simple user/assistant
|
||||
if not new_messages:
|
||||
self.session_store.append_to_transcript(
|
||||
session_entry.session_id,
|
||||
{"role": "assistant", "content": response, "timestamp": ts}
|
||||
)
|
||||
else:
|
||||
# The agent already persisted these messages to SQLite via
|
||||
# _flush_messages_to_session_db(), so skip the DB write here
|
||||
# to prevent the duplicate-write bug (#860). We still write
|
||||
# to JSONL for backward compatibility and as a backup.
|
||||
agent_persisted = self._session_db is not None
|
||||
for msg in new_messages:
|
||||
# Skip system messages (they're rebuilt each run)
|
||||
if msg.get("role") == "system":
|
||||
continue
|
||||
# Add timestamp to each message for debugging
|
||||
entry = {**msg, "timestamp": ts}
|
||||
self.session_store.append_to_transcript(
|
||||
session_entry.session_id, entry,
|
||||
skip_db=agent_persisted,
|
||||
{"role": "user", "content": message_text, "timestamp": ts}
|
||||
)
|
||||
if response:
|
||||
self.session_store.append_to_transcript(
|
||||
session_entry.session_id,
|
||||
{"role": "assistant", "content": response, "timestamp": ts}
|
||||
)
|
||||
else:
|
||||
# The agent already persisted these messages to SQLite via
|
||||
# _flush_messages_to_session_db(), so skip the DB write here
|
||||
# to prevent the duplicate-write bug (#860). We still write
|
||||
# to JSONL for backward compatibility and as a backup.
|
||||
agent_persisted = self._session_db is not None
|
||||
for msg in new_messages:
|
||||
# Skip system messages (they're rebuilt each run)
|
||||
if msg.get("role") == "system":
|
||||
continue
|
||||
# Add timestamp to each message for debugging
|
||||
entry = {**msg, "timestamp": ts}
|
||||
self.session_store.append_to_transcript(
|
||||
session_entry.session_id, entry,
|
||||
skip_db=agent_persisted,
|
||||
)
|
||||
|
||||
# Update session with actual prompt token count and model from the agent
|
||||
self.session_store.update_session(
|
||||
|
|
@ -2005,6 +2044,18 @@ class GatewayRunner:
|
|||
status_hint = " You are being rate-limited. Please wait a moment and try again."
|
||||
elif status_code == 529:
|
||||
status_hint = " The API is temporarily overloaded. Please try again shortly."
|
||||
elif status_code == 400:
|
||||
# 400 with a large session is almost always a context overflow.
|
||||
# Give specific guidance instead of a generic error. (#1630)
|
||||
_hist_len = len(history) if 'history' in locals() else 0
|
||||
if _hist_len > 50:
|
||||
return (
|
||||
"⚠️ Session too large for the model's context window.\n"
|
||||
"Use /compact to compress the conversation, or "
|
||||
"/reset to start fresh."
|
||||
)
|
||||
else:
|
||||
status_hint = " The request was rejected by the API."
|
||||
return (
|
||||
f"Sorry, I encountered an error ({error_type}).\n"
|
||||
f"{error_detail}\n"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue