feat(api): implement dynamic max tokens handling for various providers

- Added _max_tokens_param method in AIAgent to return appropriate max tokens parameter based on the provider (OpenAI vs. others). - Updated API calls in AIAgent to utilize the new max tokens handling. - Introduced auxiliary_max_tokens_param function in auxiliary_client for consistent max tokens management across auxiliary clients. - Refactored multiple tools to use auxiliary_max_tokens_param for improved compatibility with different models and providers.
2026-02-26 20:23:56 -08:00 · 2026-02-26 20:23:56 -08:00 · 58fce0a37b
commit 58fce0a37b
parent f0458ebdb8
7 changed files with 67 additions and 20 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -113,13 +113,26 @@ TURNS TO SUMMARIZE:
 Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""

        try:
-            response = self.client.chat.completions.create(
-                model=self.summary_model,
-                messages=[{"role": "user", "content": prompt}],
-                temperature=0.3,
-                max_tokens=self.summary_target_tokens * 2,
-                timeout=30.0,
-            )
+            kwargs = {
+                "model": self.summary_model,
+                "messages": [{"role": "user", "content": prompt}],
+                "temperature": 0.3,
+                "timeout": 30.0,
+            }
+            # Most providers (OpenRouter, local models) use max_tokens.
+            # Direct OpenAI with newer models (gpt-4o, o-series, gpt-5+)
+            # requires max_completion_tokens instead.
+            try:
+                kwargs["max_tokens"] = self.summary_target_tokens * 2
+                response = self.client.chat.completions.create(**kwargs)
+            except Exception as first_err:
+                if "max_tokens" in str(first_err) or "unsupported_parameter" in str(first_err):
+                    kwargs.pop("max_tokens", None)
+                    kwargs["max_completion_tokens"] = self.summary_target_tokens * 2
+                    response = self.client.chat.completions.create(**kwargs)
+                else:
+                    raise
+
            summary = response.choices[0].message.content.strip()
            if not summary.startswith("[CONTEXT SUMMARY]:"):
                summary = "[CONTEXT SUMMARY]: " + summary