feat(api): implement dynamic max tokens handling for various providers

- Added _max_tokens_param method in AIAgent to return appropriate max tokens parameter based on the provider (OpenAI vs. others). - Updated API calls in AIAgent to utilize the new max tokens handling. - Introduced auxiliary_max_tokens_param function in auxiliary_client for consistent max tokens management across auxiliary clients. - Refactored multiple tools to use auxiliary_max_tokens_param for improved compatibility with different models and providers.
2026-02-26 20:23:56 -08:00 · 2026-02-26 20:23:56 -08:00 · 58fce0a37b
commit 58fce0a37b
parent f0458ebdb8
7 changed files with 67 additions and 20 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -154,3 +154,20 @@ def get_auxiliary_extra_body() -> dict:
    by Nous Portal. Returns empty dict otherwise.
    """
    return dict(NOUS_EXTRA_BODY) if auxiliary_is_nous else {}
+
+
+def auxiliary_max_tokens_param(value: int) -> dict:
+    """Return the correct max tokens kwarg for the auxiliary client's provider.
+    
+    OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer
+    models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'.
+    """
+    custom_base = os.getenv("OPENAI_BASE_URL", "")
+    or_key = os.getenv("OPENROUTER_API_KEY")
+    # Only use max_completion_tokens when the auxiliary client resolved to
+    # direct OpenAI (no OpenRouter key, no Nous auth, custom endpoint is api.openai.com)
+    if (not or_key
+            and _read_nous_auth() is None
+            and "api.openai.com" in custom_base.lower()):
+        return {"max_completion_tokens": value}
+    return {"max_tokens": value}
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -113,13 +113,26 @@ TURNS TO SUMMARIZE:
 Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""

        try:
-            response = self.client.chat.completions.create(
-                model=self.summary_model,
-                messages=[{"role": "user", "content": prompt}],
-                temperature=0.3,
-                max_tokens=self.summary_target_tokens * 2,
-                timeout=30.0,
-            )
+            kwargs = {
+                "model": self.summary_model,
+                "messages": [{"role": "user", "content": prompt}],
+                "temperature": 0.3,
+                "timeout": 30.0,
+            }
+            # Most providers (OpenRouter, local models) use max_tokens.
+            # Direct OpenAI with newer models (gpt-4o, o-series, gpt-5+)
+            # requires max_completion_tokens instead.
+            try:
+                kwargs["max_tokens"] = self.summary_target_tokens * 2
+                response = self.client.chat.completions.create(**kwargs)
+            except Exception as first_err:
+                if "max_tokens" in str(first_err) or "unsupported_parameter" in str(first_err):
+                    kwargs.pop("max_tokens", None)
+                    kwargs["max_completion_tokens"] = self.summary_target_tokens * 2
+                    response = self.client.chat.completions.create(**kwargs)
+                else:
+                    raise
+
            summary = response.choices[0].message.content.strip()
            if not summary.startswith("[CONTEXT SUMMARY]:"):
                summary = "[CONTEXT SUMMARY]: " + summary