feat: call_llm/async_call_llm + config slots + migrate all consumers
Add centralized call_llm() and async_call_llm() functions that own the full LLM request lifecycle: 1. Resolve provider + model from task config or explicit args 2. Get or create a cached client for that provider 3. Format request args (max_tokens handling, provider extra_body) 4. Make the API call with max_tokens/max_completion_tokens retry 5. Return the response Config: expanded auxiliary section with provider:model slots for all tasks (compression, vision, web_extract, session_search, skills_hub, mcp, flush_memories). Config version bumped to 7. Migrated all auxiliary consumers: - context_compressor.py: uses call_llm(task='compression') - vision_tools.py: uses async_call_llm(task='vision') - web_tools.py: uses async_call_llm(task='web_extract') - session_search_tool.py: uses async_call_llm(task='session_search') - browser_tool.py: uses call_llm(task='vision'/'web_extract') - mcp_tool.py: uses call_llm(task='mcp') - skills_guard.py: uses call_llm(provider='openrouter') - run_agent.py flush_memories: uses call_llm(task='flush_memories') Tests updated for context_compressor and MCP tool. Some test mocks still need updating (15 remaining failures from mock pattern changes, 2 pre-existing).
This commit is contained in:
parent
013cc4d2fc
commit
0aa31cd3cb
13 changed files with 552 additions and 375 deletions
|
|
@ -9,8 +9,7 @@ from agent.context_compressor import ContextCompressor
|
|||
@pytest.fixture()
|
||||
def compressor():
|
||||
"""Create a ContextCompressor with mocked dependencies."""
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
||||
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(None, None)):
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(
|
||||
model="test/model",
|
||||
threshold_percent=0.85,
|
||||
|
|
@ -119,14 +118,11 @@ class TestGenerateSummaryNoneContent:
|
|||
"""Regression: content=None (from tool-call-only assistant messages) must not crash."""
|
||||
|
||||
def test_none_content_does_not_crash(self):
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: tool calls happened"
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
||||
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True)
|
||||
|
||||
messages = [
|
||||
|
|
@ -139,14 +135,14 @@ class TestGenerateSummaryNoneContent:
|
|||
{"role": "user", "content": "thanks"},
|
||||
]
|
||||
|
||||
summary = c._generate_summary(messages)
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
summary = c._generate_summary(messages)
|
||||
assert isinstance(summary, str)
|
||||
assert "CONTEXT SUMMARY" in summary
|
||||
|
||||
def test_none_content_in_system_message_compress(self):
|
||||
"""System message with content=None should not crash during compress."""
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
||||
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(None, None)):
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
msgs = [{"role": "system", "content": None}] + [
|
||||
|
|
@ -165,12 +161,12 @@ class TestCompressWithClient:
|
|||
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened"
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
||||
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True)
|
||||
|
||||
msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
|
||||
result = c.compress(msgs)
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
result = c.compress(msgs)
|
||||
|
||||
# Should have summary message in the middle
|
||||
contents = [m.get("content", "") for m in result]
|
||||
|
|
@ -184,8 +180,7 @@ class TestCompressWithClient:
|
|||
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: compressed middle"
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
||||
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(
|
||||
model="test",
|
||||
quiet_mode=True,
|
||||
|
|
@ -212,7 +207,8 @@ class TestCompressWithClient:
|
|||
{"role": "user", "content": "later 4"},
|
||||
]
|
||||
|
||||
result = c.compress(msgs)
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
result = c.compress(msgs)
|
||||
|
||||
answered_ids = {
|
||||
msg.get("tool_call_id")
|
||||
|
|
@ -232,8 +228,7 @@ class TestCompressWithClient:
|
|||
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened"
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
||||
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||
|
||||
# Last head message (index 1) is "assistant" → summary should be "user"
|
||||
|
|
@ -245,7 +240,8 @@ class TestCompressWithClient:
|
|||
{"role": "user", "content": "msg 4"},
|
||||
{"role": "assistant", "content": "msg 5"},
|
||||
]
|
||||
result = c.compress(msgs)
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
result = c.compress(msgs)
|
||||
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
|
||||
assert len(summary_msg) == 1
|
||||
assert summary_msg[0]["role"] == "user"
|
||||
|
|
@ -258,8 +254,7 @@ class TestCompressWithClient:
|
|||
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened"
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
||||
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3, protect_last_n=2)
|
||||
|
||||
# Last head message (index 2) is "user" → summary should be "assistant"
|
||||
|
|
@ -273,20 +268,18 @@ class TestCompressWithClient:
|
|||
{"role": "user", "content": "msg 6"},
|
||||
{"role": "assistant", "content": "msg 7"},
|
||||
]
|
||||
result = c.compress(msgs)
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
result = c.compress(msgs)
|
||||
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
|
||||
assert len(summary_msg) == 1
|
||||
assert summary_msg[0]["role"] == "assistant"
|
||||
|
||||
def test_summarization_does_not_start_tail_with_tool_outputs(self):
|
||||
mock_client = MagicMock()
|
||||
mock_response = MagicMock()
|
||||
mock_response.choices = [MagicMock()]
|
||||
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: compressed middle"
|
||||
mock_client.chat.completions.create.return_value = mock_response
|
||||
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
||||
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||
c = ContextCompressor(
|
||||
model="test",
|
||||
quiet_mode=True,
|
||||
|
|
@ -309,7 +302,8 @@ class TestCompressWithClient:
|
|||
{"role": "user", "content": "latest user"},
|
||||
]
|
||||
|
||||
result = c.compress(msgs)
|
||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||
result = c.compress(msgs)
|
||||
|
||||
called_ids = {
|
||||
tc["id"]
|
||||
|
|
|
|||
|
|
@ -1828,8 +1828,8 @@ class TestSamplingCallbackText:
|
|||
)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
params = _make_sampling_params()
|
||||
result = asyncio.run(self.handler(None, params))
|
||||
|
|
@ -1847,13 +1847,13 @@ class TestSamplingCallbackText:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
):
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
) as mock_call:
|
||||
params = _make_sampling_params(system_prompt="Be helpful")
|
||||
asyncio.run(self.handler(None, params))
|
||||
|
||||
call_args = fake_client.chat.completions.create.call_args
|
||||
call_args = mock_call.call_args
|
||||
messages = call_args.kwargs["messages"]
|
||||
assert messages[0] == {"role": "system", "content": "Be helpful"}
|
||||
|
||||
|
|
@ -1865,8 +1865,8 @@ class TestSamplingCallbackText:
|
|||
)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
params = _make_sampling_params()
|
||||
result = asyncio.run(self.handler(None, params))
|
||||
|
|
@ -1889,8 +1889,8 @@ class TestSamplingCallbackToolUse:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
params = _make_sampling_params()
|
||||
result = asyncio.run(self.handler(None, params))
|
||||
|
|
@ -1916,8 +1916,8 @@ class TestSamplingCallbackToolUse:
|
|||
)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(self.handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
@ -1939,8 +1939,8 @@ class TestToolLoopGovernance:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
params = _make_sampling_params()
|
||||
# Round 1, 2: allowed
|
||||
|
|
@ -1959,8 +1959,8 @@ class TestToolLoopGovernance:
|
|||
fake_client = MagicMock()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
# Tool response (round 1 of 1 allowed)
|
||||
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
||||
|
|
@ -1984,8 +1984,8 @@ class TestToolLoopGovernance:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
assert isinstance(result, ErrorData)
|
||||
|
|
@ -2003,8 +2003,8 @@ class TestSamplingErrors:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
# First call succeeds
|
||||
r1 = asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
|
@ -2017,20 +2017,16 @@ class TestSamplingErrors:
|
|||
|
||||
def test_timeout_error(self):
|
||||
handler = SamplingHandler("to", {"timeout": 0.05})
|
||||
fake_client = MagicMock()
|
||||
|
||||
def slow_call(**kwargs):
|
||||
import threading
|
||||
# Use an event to ensure the thread truly blocks long enough
|
||||
evt = threading.Event()
|
||||
evt.wait(5) # blocks for up to 5 seconds (cancelled by timeout)
|
||||
return _make_llm_response()
|
||||
|
||||
fake_client.chat.completions.create.side_effect = slow_call
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
side_effect=slow_call,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
assert isinstance(result, ErrorData)
|
||||
|
|
@ -2041,12 +2037,11 @@ class TestSamplingErrors:
|
|||
handler = SamplingHandler("np", {})
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(None, None),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
side_effect=RuntimeError("No LLM provider configured"),
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
assert isinstance(result, ErrorData)
|
||||
assert "No LLM provider" in result.message
|
||||
assert handler.metrics["errors"] == 1
|
||||
|
||||
def test_empty_choices_returns_error(self):
|
||||
|
|
@ -2060,8 +2055,8 @@ class TestSamplingErrors:
|
|||
)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
@ -2080,8 +2075,8 @@ class TestSamplingErrors:
|
|||
)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
@ -2099,8 +2094,8 @@ class TestSamplingErrors:
|
|||
)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
@ -2120,8 +2115,8 @@ class TestModelWhitelist:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "test-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
assert isinstance(result, CreateMessageResult)
|
||||
|
|
@ -2131,8 +2126,8 @@ class TestModelWhitelist:
|
|||
fake_client = MagicMock()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "gpt-3.5-turbo"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
assert isinstance(result, ErrorData)
|
||||
|
|
@ -2145,8 +2140,8 @@ class TestModelWhitelist:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "any-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
assert isinstance(result, CreateMessageResult)
|
||||
|
|
@ -2166,8 +2161,8 @@ class TestMalformedToolCallArgs:
|
|||
)
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
@ -2194,8 +2189,8 @@ class TestMalformedToolCallArgs:
|
|||
fake_client.chat.completions.create.return_value = response
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
@ -2214,8 +2209,8 @@ class TestMetricsTracking:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
@ -2229,8 +2224,8 @@ class TestMetricsTracking:
|
|||
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(fake_client, "default-model"),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
return_value=fake_client.chat.completions.create.return_value,
|
||||
):
|
||||
asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
@ -2241,8 +2236,8 @@ class TestMetricsTracking:
|
|||
handler = SamplingHandler("met3", {})
|
||||
|
||||
with patch(
|
||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
||||
return_value=(None, None),
|
||||
"agent.auxiliary_client.call_llm",
|
||||
side_effect=RuntimeError("No LLM provider configured"),
|
||||
):
|
||||
asyncio.run(handler(None, _make_sampling_params()))
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue