Fix context overrun crash with local LLM backends (fixes #348)

Local backends (LM Studio, Ollama, llama.cpp) return HTTP 400 with messages like "Context size has been exceeded" when the context window is full. The error phrase list did not include "context size" or "context window", so these errors fell through to the generic 4xx abort handler instead of triggering compression. Changes: - Move context-length check above generic 4xx handler so it runs first (same pattern as the existing 413 check) - Add "context size" and "context window" to the phrase list - Guard 4xx handler with `not is_context_length_error` to prevent context-related 400s from being treated as non-retryable
2026-03-05 01:12:34 +03:00 · 2026-03-05 01:12:34 +03:00 · e9ab711b66
commit e9ab711b66
parent ffec21236d
1 changed files with 40 additions and 36 deletions
--- a/run_agent.py
+++ b/run_agent.py
@ -3286,40 +3286,14 @@ class AIAgent:
                                "partial": True
                            }
-                    # Check for non-retryable client errors (4xx HTTP status codes).
+                    # Check for context-length errors BEFORE generic 4xx handler.
-                    # These indicate a problem with the request itself (bad model ID,
+                    # Local backends (LM Studio, Ollama, llama.cpp) often return
-                    # invalid API key, forbidden, etc.) and will never succeed on retry.
+                    # HTTP 400 with messages like "Context size has been exceeded"
-                    # Note: 413 is excluded — it's handled above via compression.
+                    # which must trigger compression, not an immediate abort.
                    is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413
                    is_client_error = is_client_status_error or any(phrase in error_msg for phrase in [
                        'error code: 400', 'error code: 401', 'error code: 403',
                        'error code: 404', 'error code: 422',
                        'is not a valid model', 'invalid model', 'model not found',
                        'invalid api key', 'invalid_api_key', 'authentication',
                        'unauthorized', 'forbidden', 'not found',
                    ])
                    if is_client_error:
                        self._dump_api_request_debug(
                            api_kwargs, reason="non_retryable_client_error", error=api_error,
                        )
                        print(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.")
                        print(f"{self.log_prefix}   💡 This type of error won't be fixed by retrying.")
                        logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
                        self._persist_session(messages, conversation_history)
                        return {
                            "final_response": None,
                            "messages": messages,
                            "api_calls": api_call_count,
                            "completed": False,
                            "failed": True,
                            "error": str(api_error),
                        }
                    # Check for non-retryable errors (context length exceeded)
                    is_context_length_error = any(phrase in error_msg for phrase in [
-                        'context length', 'maximum context', 'token limit',
+                        'context length', 'context size', 'maximum context',
-                        'too many tokens', 'reduce the length', 'exceeds the limit',
+                        'token limit', 'too many tokens', 'reduce the length',
                        'exceeds the limit', 'context window',
                        'request entity too large',  # OpenRouter/Nous 413 safety net
                    ])
@ -3348,6 +3322,36 @@ class AIAgent:
                                "partial": True
                            }
                    # Check for non-retryable client errors (4xx HTTP status codes).
                    # These indicate a problem with the request itself (bad model ID,
                    # invalid API key, forbidden, etc.) and will never succeed on retry.
                    # Note: 413 and context-length errors are excluded — handled above.
                    is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413
                    is_client_error = (is_client_status_error and not is_context_length_error) or any(phrase in error_msg for phrase in [
                        'error code: 401', 'error code: 403',
                        'error code: 404', 'error code: 422',
                        'is not a valid model', 'invalid model', 'model not found',
                        'invalid api key', 'invalid_api_key', 'authentication',
                        'unauthorized', 'forbidden', 'not found',
                    ])
                    if is_client_error:
                        self._dump_api_request_debug(
                            api_kwargs, reason="non_retryable_client_error", error=api_error,
                        )
                        print(f"{self.log_prefix}❌ Non-retryable client error detected. Aborting immediately.")
                        print(f"{self.log_prefix}   💡 This type of error won't be fixed by retrying.")
                        logging.error(f"{self.log_prefix}Non-retryable client error: {api_error}")
                        self._persist_session(messages, conversation_history)
                        return {
                            "final_response": None,
                            "messages": messages,
                            "api_calls": api_call_count,
                            "completed": False,
                            "failed": True,
                            "error": str(api_error),
                        }
                    if retry_count >= max_retries:
                        print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.")
                        logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")