Merge PR #436: fix: use _max_tokens_param in max-iterations retry path

Authored by Farukest. Fixes #435. The retry summary in
_handle_max_iterations() hardcoded max_tokens instead of using
_max_tokens_param(), which returns max_completion_tokens for direct
OpenAI API (required by gpt-4o, o-series). The first attempt already
used _max_tokens_param correctly — only the retry path was wrong.
Includes 4 tests for _max_tokens_param provider detection.
This commit is contained in:
teknium1 2026-03-06 04:46:24 -08:00
commit 3e93db16bd
2 changed files with 29 additions and 1 deletions

View file

@ -2743,7 +2743,7 @@ class AIAgent:
"messages": api_messages,
}
if self.max_tokens is not None:
summary_kwargs["max_tokens"] = self.max_tokens
summary_kwargs.update(self._max_tokens_param(self.max_tokens))
if summary_extra_body:
summary_kwargs["extra_body"] = summary_extra_body

View file

@ -932,3 +932,31 @@ class TestConversationHistoryNotMutated:
)
# Result should have more messages than the original history
assert len(result["messages"]) > original_len
# ---------------------------------------------------------------------------
# _max_tokens_param consistency
# ---------------------------------------------------------------------------
class TestMaxTokensParam:
"""Verify _max_tokens_param returns the correct key for each provider."""
def test_returns_max_completion_tokens_for_direct_openai(self, agent):
agent.base_url = "https://api.openai.com/v1"
result = agent._max_tokens_param(4096)
assert result == {"max_completion_tokens": 4096}
def test_returns_max_tokens_for_openrouter(self, agent):
agent.base_url = "https://openrouter.ai/api/v1"
result = agent._max_tokens_param(4096)
assert result == {"max_tokens": 4096}
def test_returns_max_tokens_for_local(self, agent):
agent.base_url = "http://localhost:11434/v1"
result = agent._max_tokens_param(4096)
assert result == {"max_tokens": 4096}
def test_not_tricked_by_openai_in_openrouter_url(self, agent):
agent.base_url = "https://openrouter.ai/api/v1/api.openai.com"
result = agent._max_tokens_param(4096)
assert result == {"max_tokens": 4096}