Merge PR #403: Fix context overrun crash with local LLM backends
Authored by ch3ronsa. Fixes #348. Adds 'context size' (LM Studio) and 'context window' (Ollama) to context-length error detection phrases so local backend 400 errors trigger compression instead of aborting. Also removes 'error code: 400' from the non-retryable error list as defense in depth.
This commit is contained in:
commit
3220bb8aaa
1 changed files with 11 additions and 11 deletions
22
run_agent.py
22
run_agent.py
|
|
@ -3333,23 +3333,24 @@ class AIAgent:
|
||||||
}
|
}
|
||||||
|
|
||||||
# Check for context-length errors BEFORE generic 4xx handler.
|
# Check for context-length errors BEFORE generic 4xx handler.
|
||||||
# OpenRouter returns 400 (not 413) for "maximum context length"
|
# Local backends (LM Studio, Ollama, llama.cpp) often return
|
||||||
# errors — if we let the generic 4xx handler catch those first,
|
# HTTP 400 with messages like "Context size has been exceeded"
|
||||||
# it aborts immediately instead of attempting compression+retry.
|
# which must trigger compression, not an immediate abort.
|
||||||
is_context_length_error = any(phrase in error_msg for phrase in [
|
is_context_length_error = any(phrase in error_msg for phrase in [
|
||||||
'context length', 'maximum context', 'token limit',
|
'context length', 'context size', 'maximum context',
|
||||||
'too many tokens', 'reduce the length', 'exceeds the limit',
|
'token limit', 'too many tokens', 'reduce the length',
|
||||||
|
'exceeds the limit', 'context window',
|
||||||
'request entity too large', # OpenRouter/Nous 413 safety net
|
'request entity too large', # OpenRouter/Nous 413 safety net
|
||||||
])
|
])
|
||||||
|
|
||||||
if is_context_length_error:
|
if is_context_length_error:
|
||||||
print(f"{self.log_prefix}⚠️ Context length exceeded - attempting compression...")
|
print(f"{self.log_prefix}⚠️ Context length exceeded - attempting compression...")
|
||||||
|
|
||||||
original_len = len(messages)
|
original_len = len(messages)
|
||||||
messages, active_system_prompt = self._compress_context(
|
messages, active_system_prompt = self._compress_context(
|
||||||
messages, system_message, approx_tokens=approx_tokens
|
messages, system_message, approx_tokens=approx_tokens
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(messages) < original_len:
|
if len(messages) < original_len:
|
||||||
print(f"{self.log_prefix} 🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
|
print(f"{self.log_prefix} 🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
|
||||||
continue # Retry with compressed messages
|
continue # Retry with compressed messages
|
||||||
|
|
@ -3370,11 +3371,10 @@ class AIAgent:
|
||||||
# Check for non-retryable client errors (4xx HTTP status codes).
|
# Check for non-retryable client errors (4xx HTTP status codes).
|
||||||
# These indicate a problem with the request itself (bad model ID,
|
# These indicate a problem with the request itself (bad model ID,
|
||||||
# invalid API key, forbidden, etc.) and will never succeed on retry.
|
# invalid API key, forbidden, etc.) and will never succeed on retry.
|
||||||
# Note: 413 and context-length errors are excluded — handled above
|
# Note: 413 and context-length errors are excluded — handled above.
|
||||||
# via compression.
|
|
||||||
is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413
|
is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413
|
||||||
is_client_error = (is_client_status_error or any(phrase in error_msg for phrase in [
|
is_client_error = (is_client_status_error or any(phrase in error_msg for phrase in [
|
||||||
'error code: 400', 'error code: 401', 'error code: 403',
|
'error code: 401', 'error code: 403',
|
||||||
'error code: 404', 'error code: 422',
|
'error code: 404', 'error code: 422',
|
||||||
'is not a valid model', 'invalid model', 'model not found',
|
'is not a valid model', 'invalid model', 'model not found',
|
||||||
'invalid api key', 'invalid_api_key', 'authentication',
|
'invalid api key', 'invalid_api_key', 'authentication',
|
||||||
|
|
@ -3397,7 +3397,7 @@ class AIAgent:
|
||||||
"failed": True,
|
"failed": True,
|
||||||
"error": str(api_error),
|
"error": str(api_error),
|
||||||
}
|
}
|
||||||
|
|
||||||
if retry_count >= max_retries:
|
if retry_count >= max_retries:
|
||||||
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.")
|
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.")
|
||||||
logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")
|
logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue