fix: audit fixes — 5 bugs found and resolved

Thorough code review found 5 issues across run_agent.py, cli.py, and gateway/:

1. CRITICAL — Gateway stream consumer task never started: stream_consumer_holder
   was checked BEFORE run_sync populated it. Fixed with async polling pattern
   (same as track_agent).

2. MEDIUM-HIGH — Streaming fallback after partial delivery caused double-response:
   if streaming failed after some tokens were delivered, the fallback would
   re-deliver the full response. Now tracks deltas_were_sent and only falls
   back when no tokens reached consumers yet.

3. MEDIUM — Codex mode lost on_first_delta spinner callback: _run_codex_stream
   now accepts on_first_delta parameter, fires it on first text delta. Passed
   through from _interruptible_streaming_api_call via _codex_on_first_delta
   instance attribute.

4. MEDIUM — CLI close-tag after-text bypassed tag filtering: text after a
   reasoning close tag was sent directly to _emit_stream_text, skipping
   open-tag detection. Now routes through _stream_delta for full filtering.

5. LOW — Removed 140 lines of dead code: old _streaming_api_call method
   (superseded by _interruptible_streaming_api_call). Updated 13 tests in
   test_run_agent.py and test_openai_client_lifecycle.py to use the new
   method name and signature.

4573 tests passing.
This commit is contained in:
teknium1 2026-03-16 06:35:46 -07:00
parent 99369b926c
commit 8e07f9ca56
5 changed files with 75 additions and 176 deletions

View file

@ -59,8 +59,11 @@ def _build_agent(shared_client=None):
agent._interrupt_requested = False
agent._interrupt_message = None
agent._client_lock = threading.RLock()
agent._client_kwargs = {"api_key": "test-key", "base_url": agent.base_url}
agent._client_kwargs = {"api_key": "***", "base_url": agent.base_url}
agent.client = shared_client or FakeSharedClient(lambda **kwargs: {"shared": True})
agent.stream_delta_callback = None
agent._stream_callback = None
agent.reasoning_callback = None
return agent
@ -173,7 +176,11 @@ def test_streaming_call_recreates_closed_shared_client_before_request(monkeypatc
monkeypatch.setattr(run_agent, "OpenAI", factory)
agent = _build_agent(shared_client=stale_shared)
response = agent._streaming_api_call({"model": agent.model, "messages": []}, lambda _delta: None)
agent.stream_delta_callback = lambda _delta: None
# Force chat_completions mode so the streaming path uses
# chat.completions.create(stream=True) instead of Codex responses.stream()
agent.api_mode = "chat_completions"
response = agent._interruptible_streaming_api_call({"model": agent.model, "messages": []})
assert response.choices[0].message.content == "Hello world"
assert agent.client is replacement_shared

View file

@ -2329,8 +2329,9 @@ class TestStreamingApiCall:
]
agent.client.chat.completions.create.return_value = iter(chunks)
callback = MagicMock()
agent.stream_delta_callback = callback
resp = agent._streaming_api_call({"messages": []}, callback)
resp = agent._interruptible_streaming_api_call({"messages": []})
assert resp.choices[0].message.content == "Hello World"
assert resp.choices[0].finish_reason == "stop"
@ -2347,7 +2348,7 @@ class TestStreamingApiCall:
]
agent.client.chat.completions.create.return_value = iter(chunks)
resp = agent._streaming_api_call({"messages": []}, MagicMock())
resp = agent._interruptible_streaming_api_call({"messages": []})
tc = resp.choices[0].message.tool_calls
assert len(tc) == 1
@ -2363,7 +2364,7 @@ class TestStreamingApiCall:
]
agent.client.chat.completions.create.return_value = iter(chunks)
resp = agent._streaming_api_call({"messages": []}, MagicMock())
resp = agent._interruptible_streaming_api_call({"messages": []})
tc = resp.choices[0].message.tool_calls
assert len(tc) == 2
@ -2378,7 +2379,7 @@ class TestStreamingApiCall:
]
agent.client.chat.completions.create.return_value = iter(chunks)
resp = agent._streaming_api_call({"messages": []}, MagicMock())
resp = agent._interruptible_streaming_api_call({"messages": []})
assert resp.choices[0].message.content == "I'll search"
assert len(resp.choices[0].message.tool_calls) == 1
@ -2387,7 +2388,7 @@ class TestStreamingApiCall:
chunks = [_make_chunk(finish_reason="stop")]
agent.client.chat.completions.create.return_value = iter(chunks)
resp = agent._streaming_api_call({"messages": []}, MagicMock())
resp = agent._interruptible_streaming_api_call({"messages": []})
assert resp.choices[0].message.content is None
assert resp.choices[0].message.tool_calls is None
@ -2399,9 +2400,9 @@ class TestStreamingApiCall:
_make_chunk(finish_reason="stop"),
]
agent.client.chat.completions.create.return_value = iter(chunks)
callback = MagicMock(side_effect=ValueError("boom"))
agent.stream_delta_callback = MagicMock(side_effect=ValueError("boom"))
resp = agent._streaming_api_call({"messages": []}, callback)
resp = agent._interruptible_streaming_api_call({"messages": []})
assert resp.choices[0].message.content == "Hello World"
@ -2412,7 +2413,7 @@ class TestStreamingApiCall:
]
agent.client.chat.completions.create.return_value = iter(chunks)
resp = agent._streaming_api_call({"messages": []}, MagicMock())
resp = agent._interruptible_streaming_api_call({"messages": []})
assert resp.model == "gpt-4o"
@ -2420,22 +2421,23 @@ class TestStreamingApiCall:
chunks = [_make_chunk(content="x"), _make_chunk(finish_reason="stop")]
agent.client.chat.completions.create.return_value = iter(chunks)
agent._streaming_api_call({"messages": [], "model": "test"}, MagicMock())
agent._interruptible_streaming_api_call({"messages": [], "model": "test"})
call_kwargs = agent.client.chat.completions.create.call_args
assert call_kwargs[1].get("stream") is True or call_kwargs.kwargs.get("stream") is True
def test_api_exception_propagated(self, agent):
def test_api_exception_falls_back_to_non_streaming(self, agent):
"""When streaming fails before any deltas, fallback to non-streaming is attempted."""
agent.client.chat.completions.create.side_effect = ConnectionError("fail")
# The fallback also uses the same client, so it'll fail too
with pytest.raises(ConnectionError, match="fail"):
agent._streaming_api_call({"messages": []}, MagicMock())
agent._interruptible_streaming_api_call({"messages": []})
def test_response_has_uuid_id(self, agent):
chunks = [_make_chunk(content="x"), _make_chunk(finish_reason="stop")]
agent.client.chat.completions.create.return_value = iter(chunks)
resp = agent._streaming_api_call({"messages": []}, MagicMock())
resp = agent._interruptible_streaming_api_call({"messages": []})
assert resp.id.startswith("stream-")
assert len(resp.id) > len("stream-")
@ -2449,7 +2451,7 @@ class TestStreamingApiCall:
]
agent.client.chat.completions.create.return_value = iter(chunks)
resp = agent._streaming_api_call({"messages": []}, MagicMock())
resp = agent._interruptible_streaming_api_call({"messages": []})
assert resp.choices[0].message.content == "Hello"
assert resp.model == "gpt-4"
@ -2505,7 +2507,7 @@ class TestAnthropicInterruptHandler:
def test_streaming_has_anthropic_branch(self):
"""_streaming_api_call must also handle Anthropic interrupt."""
import inspect
source = inspect.getsource(AIAgent._streaming_api_call)
source = inspect.getsource(AIAgent._interruptible_streaming_api_call)
assert "anthropic_messages" in source, \
"_streaming_api_call must handle Anthropic interrupt"