refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults. - Updated the context compressor's summary target tokens to 2500 for improved performance. - Added external credential detection for Codex CLI to streamline authentication. - Refactored various components to ensure consistent handling of authentication and model selection across the application.
2026-02-28 21:47:51 -08:00 · 2026-02-28 21:47:51 -08:00 · 500f0eab4a
commit 500f0eab4a
parent 86b1db0598
22 changed files with 1784 additions and 207 deletions
--- a/tests/test_flush_memories_codex.py
+++ b/tests/test_flush_memories_codex.py
@ -0,0 +1,225 @@
+"""Tests for flush_memories() working correctly across all provider modes.
+
+Catches the bug where Codex mode called chat.completions.create on a
+Responses-only client, which would fail silently or with a 404.
+"""
+
+import json
+import os
+import sys
+import types
+from types import SimpleNamespace
+from unittest.mock import patch, MagicMock, call
+
+import pytest
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+import run_agent
+
+
+class _FakeOpenAI:
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+        self.api_key = kwargs.get("api_key", "test")
+        self.base_url = kwargs.get("base_url", "http://test")
+
+    def close(self):
+        pass
+
+
+def _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter"):
+    """Build an AIAgent with mocked internals, ready for flush_memories testing."""
+    monkeypatch.setattr(run_agent, "get_tool_definitions", lambda **kw: [
+        {
+            "type": "function",
+            "function": {
+                "name": "memory",
+                "description": "Manage memories.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "action": {"type": "string"},
+                        "target": {"type": "string"},
+                        "content": {"type": "string"},
+                    },
+                },
+            },
+        },
+    ])
+    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
+    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
+
+    agent = run_agent.AIAgent(
+        api_key="test-key",
+        base_url="https://test.example.com/v1",
+        provider=provider,
+        api_mode=api_mode,
+        max_iterations=4,
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    # Give it a valid memory store
+    agent._memory_store = MagicMock()
+    agent._memory_flush_min_turns = 1
+    agent._user_turn_count = 5
+    return agent
+
+
+def _chat_response_with_memory_call():
+    """Simulated chat completions response with a memory tool call."""
+    return SimpleNamespace(
+        choices=[SimpleNamespace(
+            message=SimpleNamespace(
+                content=None,
+                tool_calls=[SimpleNamespace(
+                    function=SimpleNamespace(
+                        name="memory",
+                        arguments=json.dumps({
+                            "action": "add",
+                            "target": "notes",
+                            "content": "User prefers dark mode.",
+                        }),
+                    ),
+                )],
+            ),
+        )],
+        usage=SimpleNamespace(prompt_tokens=100, completion_tokens=20, total_tokens=120),
+    )
+
+
+class TestFlushMemoriesUsesAuxiliaryClient:
+    """When an auxiliary client is available, flush_memories should use it
+    instead of self.client -- especially critical in Codex mode."""
+
+    def test_flush_uses_auxiliary_when_available(self, monkeypatch):
+        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
+
+        mock_aux_client = MagicMock()
+        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there"},
+                {"role": "user", "content": "Remember this"},
+            ]
+            with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
+                agent.flush_memories(messages)
+
+        mock_aux_client.chat.completions.create.assert_called_once()
+        call_kwargs = mock_aux_client.chat.completions.create.call_args
+        assert call_kwargs.kwargs.get("model") == "gpt-4o-mini" or call_kwargs[1].get("model") == "gpt-4o-mini"
+
+    def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch):
+        """Non-Codex mode with no auxiliary falls back to self.client."""
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+        agent.client = MagicMock()
+        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there"},
+                {"role": "user", "content": "Save this"},
+            ]
+            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
+                agent.flush_memories(messages)
+
+        agent.client.chat.completions.create.assert_called_once()
+
+    def test_flush_executes_memory_tool_calls(self, monkeypatch):
+        """Verify that memory tool calls from the flush response actually get executed."""
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+
+        mock_aux_client = MagicMock()
+        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+                {"role": "user", "content": "Note this"},
+            ]
+            with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
+                agent.flush_memories(messages)
+
+        mock_memory.assert_called_once()
+        call_kwargs = mock_memory.call_args
+        assert call_kwargs.kwargs["action"] == "add"
+        assert call_kwargs.kwargs["target"] == "notes"
+        assert "dark mode" in call_kwargs.kwargs["content"]
+
+    def test_flush_strips_artifacts_from_messages(self, monkeypatch):
+        """After flush, the flush prompt and any response should be removed from messages."""
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+
+        mock_aux_client = MagicMock()
+        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+                {"role": "user", "content": "Remember X"},
+            ]
+            original_len = len(messages)
+            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
+                agent.flush_memories(messages)
+
+        # Messages should not grow from the flush
+        assert len(messages) <= original_len
+        # No flush sentinel should remain
+        for msg in messages:
+            assert "_flush_sentinel" not in msg
+
+
+class TestFlushMemoriesCodexFallback:
+    """When no auxiliary client exists and we're in Codex mode, flush should
+    use the Codex Responses API path instead of chat.completions."""
+
+    def test_codex_mode_no_aux_uses_responses_api(self, monkeypatch):
+        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
+
+        codex_response = SimpleNamespace(
+            output=[
+                SimpleNamespace(
+                    type="function_call",
+                    call_id="call_1",
+                    name="memory",
+                    arguments=json.dumps({
+                        "action": "add",
+                        "target": "notes",
+                        "content": "Codex flush test",
+                    }),
+                ),
+            ],
+            usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60),
+            status="completed",
+            model="gpt-5-codex",
+        )
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)), \
+             patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
+             patch.object(agent, "_build_api_kwargs") as mock_build, \
+             patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
+            mock_build.return_value = {
+                "model": "gpt-5-codex",
+                "instructions": "test",
+                "input": [],
+                "tools": [],
+                "max_output_tokens": 4096,
+            }
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+                {"role": "user", "content": "Save this"},
+            ]
+            agent.flush_memories(messages)
+
+        mock_stream.assert_called_once()
+        mock_memory.assert_called_once()
+        assert mock_memory.call_args.kwargs["content"] == "Codex flush test"