feat(api): implement dynamic max tokens handling for various providers

- Added _max_tokens_param method in AIAgent to return appropriate max tokens parameter based on the provider (OpenAI vs. others). - Updated API calls in AIAgent to utilize the new max tokens handling. - Introduced auxiliary_max_tokens_param function in auxiliary_client for consistent max tokens management across auxiliary clients. - Refactored multiple tools to use auxiliary_max_tokens_param for improved compatibility with different models and providers.
2026-02-26 20:23:56 -08:00 · 2026-02-26 20:23:56 -08:00 · 58fce0a37b
commit 58fce0a37b
parent f0458ebdb8
7 changed files with 67 additions and 20 deletions
--- a/run_agent.py
+++ b/run_agent.py
@ -450,6 +450,21 @@ class AIAgent:
            else:
                print(f"📊 Context limit: {self.context_compressor.context_length:,} tokens (auto-compression disabled)")
    
+    def _max_tokens_param(self, value: int) -> dict:
+        """Return the correct max tokens kwarg for the current provider.
+        
+        OpenAI's newer models (gpt-4o, o-series, gpt-5+) require
+        'max_completion_tokens'. OpenRouter, local models, and older
+        OpenAI models use 'max_tokens'.
+        """
+        _is_direct_openai = (
+            "api.openai.com" in self.base_url.lower()
+            and "openrouter" not in self.base_url.lower()
+        )
+        if _is_direct_openai:
+            return {"max_completion_tokens": value}
+        return {"max_tokens": value}
+
    def _has_content_after_think_block(self, content: str) -> bool:
        """
        Check if content has actual text after any <think></think> blocks.
@ -1190,7 +1205,7 @@ class AIAgent:
        }

        if self.max_tokens is not None:
-            api_kwargs["max_tokens"] = self.max_tokens
+            api_kwargs.update(self._max_tokens_param(self.max_tokens))

        extra_body = {}

@ -1324,7 +1339,7 @@ class AIAgent:
                "messages": api_messages,
                "tools": [memory_tool_def],
                "temperature": 0.3,
-                "max_tokens": 1024,
+                **self._max_tokens_param(1024),
            }

            response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
@ -1644,7 +1659,7 @@ class AIAgent:
                "messages": api_messages,
            }
            if self.max_tokens is not None:
-                summary_kwargs["max_tokens"] = self.max_tokens
+                summary_kwargs.update(self._max_tokens_param(self.max_tokens))
            if summary_extra_body:
                summary_kwargs["extra_body"] = summary_extra_body