refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults. - Updated the context compressor's summary target tokens to 2500 for improved performance. - Added external credential detection for Codex CLI to streamline authentication. - Refactored various components to ensure consistent handling of authentication and model selection across the application.
2026-02-28 21:47:51 -08:00 · 2026-02-28 21:47:51 -08:00 · 500f0eab4a
commit 500f0eab4a
parent 86b1db0598
22 changed files with 1784 additions and 207 deletions
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -0,0 +1,168 @@
+"""Tests for agent.auxiliary_client resolution chain, especially the Codex fallback."""
+
+import json
+import os
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+from agent.auxiliary_client import (
+    get_text_auxiliary_client,
+    get_vision_auxiliary_client,
+    auxiliary_max_tokens_param,
+    _read_codex_access_token,
+)
+
+
+@pytest.fixture(autouse=True)
+def _clean_env(monkeypatch):
+    """Strip provider env vars so each test starts clean."""
+    for key in (
+        "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
+        "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
+    ):
+        monkeypatch.delenv(key, raising=False)
+
+
+@pytest.fixture
+def codex_auth_dir(tmp_path, monkeypatch):
+    """Provide a writable ~/.codex/ directory with a valid auth.json."""
+    codex_dir = tmp_path / ".codex"
+    codex_dir.mkdir()
+    auth_file = codex_dir / "auth.json"
+    auth_file.write_text(json.dumps({
+        "tokens": {
+            "access_token": "codex-test-token-abc123",
+            "refresh_token": "codex-refresh-xyz",
+        }
+    }))
+    monkeypatch.setattr(
+        "agent.auxiliary_client._read_codex_access_token",
+        lambda: "codex-test-token-abc123",
+    )
+    return codex_dir
+
+
+class TestReadCodexAccessToken:
+    def test_valid_auth_file(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        auth = codex_dir / "auth.json"
+        auth.write_text(json.dumps({
+            "tokens": {"access_token": "tok-123", "refresh_token": "r-456"}
+        }))
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result == "tok-123"
+
+    def test_missing_file_returns_none(self, tmp_path):
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result is None
+
+    def test_empty_token_returns_none(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        auth = codex_dir / "auth.json"
+        auth.write_text(json.dumps({"tokens": {"access_token": "  "}}))
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result is None
+
+    def test_malformed_json_returns_none(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        (codex_dir / "auth.json").write_text("{bad json")
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result is None
+
+    def test_missing_tokens_key_returns_none(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        (codex_dir / "auth.json").write_text(json.dumps({"other": "data"}))
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result is None
+
+
+class TestGetTextAuxiliaryClient:
+    """Test the full resolution chain for get_text_auxiliary_client."""
+
+    def test_openrouter_takes_priority(self, monkeypatch, codex_auth_dir):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client()
+        assert model == "google/gemini-3-flash-preview"
+        mock_openai.assert_called_once()
+        call_kwargs = mock_openai.call_args
+        assert call_kwargs.kwargs["api_key"] == "or-key"
+
+    def test_nous_takes_priority_over_codex(self, monkeypatch, codex_auth_dir):
+        with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            mock_nous.return_value = {"access_token": "nous-tok"}
+            client, model = get_text_auxiliary_client()
+        assert model == "gemini-3-flash"
+
+    def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir):
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
+        # Override the autouse monkeypatch for codex
+        monkeypatch.setattr(
+            "agent.auxiliary_client._read_codex_access_token",
+            lambda: "codex-test-token-abc123",
+        )
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client()
+        assert model == "gpt-4o-mini"
+        call_kwargs = mock_openai.call_args
+        assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
+
+    def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client()
+        assert model == "gpt-5.3-codex"
+        # Returns a CodexAuxiliaryClient wrapper, not a raw OpenAI client
+        from agent.auxiliary_client import CodexAuxiliaryClient
+        assert isinstance(client, CodexAuxiliaryClient)
+
+    def test_returns_none_when_nothing_available(self):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+            client, model = get_text_auxiliary_client()
+        assert client is None
+        assert model is None
+
+
+class TestCodexNotInVisionClient:
+    """Codex fallback should NOT apply to vision tasks."""
+
+    def test_vision_returns_none_without_openrouter_nous(self):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
+            client, model = get_vision_auxiliary_client()
+        assert client is None
+        assert model is None
+
+
+class TestAuxiliaryMaxTokensParam:
+    def test_codex_fallback_uses_max_tokens(self, monkeypatch):
+        """Codex adapter translates max_tokens internally, so we return max_tokens."""
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value="tok"):
+            result = auxiliary_max_tokens_param(1024)
+        assert result == {"max_tokens": 1024}
+
+    def test_openrouter_uses_max_tokens(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        result = auxiliary_max_tokens_param(1024)
+        assert result == {"max_tokens": 1024}
+
+    def test_no_provider_uses_max_tokens(self):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+            result = auxiliary_max_tokens_param(1024)
+        assert result == {"max_tokens": 1024}
--- a/tests/test_auth_codex_provider.py
+++ b/tests/test_auth_codex_provider.py
@ -185,8 +185,8 @@ def test_login_openai_codex_persists_provider_state(tmp_path, monkeypatch):
    _write_codex_auth(codex_home)
    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
    monkeypatch.setenv("CODEX_HOME", str(codex_home))
-    monkeypatch.setattr("hermes_cli.auth.shutil.which", lambda _: "/usr/local/bin/codex")
-    monkeypatch.setattr("hermes_cli.auth.subprocess.run", lambda *a, **k: None)
+    # Mock input() to accept existing credentials
+    monkeypatch.setattr("builtins.input", lambda _: "y")

    _login_openai_codex(SimpleNamespace(), PROVIDER_REGISTRY["openai-codex"])

@ -201,19 +201,10 @@ def test_login_openai_codex_persists_provider_state(tmp_path, monkeypatch):
    assert config["model"]["base_url"] == DEFAULT_CODEX_BASE_URL


-def test_login_command_defaults_to_nous(monkeypatch):
-    calls = {"nous": 0, "codex": 0}
-
-    def _fake_nous(args, pconfig):
-        calls["nous"] += 1
-
-    def _fake_codex(args, pconfig):
-        calls["codex"] += 1
-
-    monkeypatch.setattr("hermes_cli.auth._login_nous", _fake_nous)
-    monkeypatch.setattr("hermes_cli.auth._login_openai_codex", _fake_codex)
-
-    login_command(SimpleNamespace())
-
-    assert calls["nous"] == 1
-    assert calls["codex"] == 0
+def test_login_command_shows_deprecation(monkeypatch, capsys):
+    """login_command is deprecated and directs users to hermes model."""
+    with pytest.raises(SystemExit) as exc_info:
+        login_command(SimpleNamespace())
+    assert exc_info.value.code == 0
+    captured = capsys.readouterr()
+    assert "hermes model" in captured.out
--- a/tests/test_cli_init.py
+++ b/tests/test_cli_init.py
@ -0,0 +1,80 @@
+"""Tests for HermesCLI initialization -- catches configuration bugs
+that only manifest at runtime (not in mocked unit tests)."""
+
+import os
+import sys
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+
+def _make_cli(**kwargs):
+    """Create a HermesCLI instance with minimal mocking."""
+    from cli import HermesCLI
+    with patch("cli.get_tool_definitions", return_value=[]):
+        return HermesCLI(**kwargs)
+
+
+class TestMaxTurnsResolution:
+    """max_turns must always resolve to a positive integer, never None."""
+
+    def test_default_max_turns_is_integer(self):
+        cli = _make_cli()
+        assert isinstance(cli.max_turns, int)
+        assert cli.max_turns > 0
+
+    def test_explicit_max_turns_honored(self):
+        cli = _make_cli(max_turns=25)
+        assert cli.max_turns == 25
+
+    def test_none_max_turns_gets_default(self):
+        cli = _make_cli(max_turns=None)
+        assert isinstance(cli.max_turns, int)
+        assert cli.max_turns > 0
+
+    def test_env_var_max_turns(self, monkeypatch):
+        """Env var is used when config file doesn't set max_turns."""
+        monkeypatch.setenv("HERMES_MAX_ITERATIONS", "42")
+        import cli as cli_module
+        original = cli_module.CLI_CONFIG["agent"].get("max_turns")
+        cli_module.CLI_CONFIG["agent"]["max_turns"] = None
+        try:
+            cli_obj = _make_cli()
+            assert cli_obj.max_turns == 42
+        finally:
+            if original is not None:
+                cli_module.CLI_CONFIG["agent"]["max_turns"] = original
+
+    def test_max_turns_never_none_for_agent(self):
+        """The value passed to AIAgent must never be None (causes TypeError in run_conversation)."""
+        cli = _make_cli()
+        assert cli.max_turns is not None
+
+
+class TestVerboseAndToolProgress:
+    def test_default_verbose_is_bool(self):
+        cli = _make_cli()
+        assert isinstance(cli.verbose, bool)
+
+    def test_tool_progress_mode_is_string(self):
+        cli = _make_cli()
+        assert isinstance(cli.tool_progress_mode, str)
+        assert cli.tool_progress_mode in ("off", "new", "all", "verbose")
+
+
+class TestProviderResolution:
+    def test_api_key_is_string_or_none(self):
+        cli = _make_cli()
+        assert cli.api_key is None or isinstance(cli.api_key, str)
+
+    def test_base_url_is_string(self):
+        cli = _make_cli()
+        assert isinstance(cli.base_url, str)
+        assert cli.base_url.startswith("http")
+
+    def test_model_is_string(self):
+        cli = _make_cli()
+        assert isinstance(cli.model, str)
+        assert len(cli.model) > 0
--- a/tests/test_codex_execution_paths.py
+++ b/tests/test_codex_execution_paths.py
@ -149,6 +149,11 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
    runner._prefill_messages = []
    runner._reasoning_config = None
    runner._running_agents = {}
+    from unittest.mock import MagicMock, AsyncMock
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+    runner.hooks.loaded_hooks = []
+    runner._session_db = None

    source = SessionSource(
        platform=Platform.LOCAL,
--- a/tests/test_external_credential_detection.py
+++ b/tests/test_external_credential_detection.py
@ -0,0 +1,51 @@
+"""Tests for detect_external_credentials() -- Phase 2 credential sync."""
+
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from hermes_cli.auth import detect_external_credentials
+
+
+class TestDetectCodexCLI:
+    def test_detects_valid_codex_auth(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        auth = codex_dir / "auth.json"
+        auth.write_text(json.dumps({
+            "tokens": {"access_token": "tok-123", "refresh_token": "ref-456"}
+        }))
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
+            result = detect_external_credentials()
+        codex_hits = [c for c in result if c["provider"] == "openai-codex"]
+        assert len(codex_hits) == 1
+        assert "Codex CLI" in codex_hits[0]["label"]
+        assert str(auth) == codex_hits[0]["path"]
+
+    def test_skips_codex_without_access_token(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        (codex_dir / "auth.json").write_text(json.dumps({"tokens": {}}))
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
+            result = detect_external_credentials()
+        assert not any(c["provider"] == "openai-codex" for c in result)
+
+    def test_skips_missing_codex_dir(self, tmp_path):
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / "nonexistent"):
+            result = detect_external_credentials()
+        assert not any(c["provider"] == "openai-codex" for c in result)
+
+    def test_skips_malformed_codex_auth(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        (codex_dir / "auth.json").write_text("{bad json")
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
+            result = detect_external_credentials()
+        assert not any(c["provider"] == "openai-codex" for c in result)
+
+    def test_returns_empty_when_nothing_found(self, tmp_path):
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / ".codex"):
+            result = detect_external_credentials()
+        assert result == []
--- a/tests/test_flush_memories_codex.py
+++ b/tests/test_flush_memories_codex.py
@ -0,0 +1,225 @@
+"""Tests for flush_memories() working correctly across all provider modes.
+
+Catches the bug where Codex mode called chat.completions.create on a
+Responses-only client, which would fail silently or with a 404.
+"""
+
+import json
+import os
+import sys
+import types
+from types import SimpleNamespace
+from unittest.mock import patch, MagicMock, call
+
+import pytest
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+import run_agent
+
+
+class _FakeOpenAI:
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+        self.api_key = kwargs.get("api_key", "test")
+        self.base_url = kwargs.get("base_url", "http://test")
+
+    def close(self):
+        pass
+
+
+def _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter"):
+    """Build an AIAgent with mocked internals, ready for flush_memories testing."""
+    monkeypatch.setattr(run_agent, "get_tool_definitions", lambda **kw: [
+        {
+            "type": "function",
+            "function": {
+                "name": "memory",
+                "description": "Manage memories.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "action": {"type": "string"},
+                        "target": {"type": "string"},
+                        "content": {"type": "string"},
+                    },
+                },
+            },
+        },
+    ])
+    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
+    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
+
+    agent = run_agent.AIAgent(
+        api_key="test-key",
+        base_url="https://test.example.com/v1",
+        provider=provider,
+        api_mode=api_mode,
+        max_iterations=4,
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    # Give it a valid memory store
+    agent._memory_store = MagicMock()
+    agent._memory_flush_min_turns = 1
+    agent._user_turn_count = 5
+    return agent
+
+
+def _chat_response_with_memory_call():
+    """Simulated chat completions response with a memory tool call."""
+    return SimpleNamespace(
+        choices=[SimpleNamespace(
+            message=SimpleNamespace(
+                content=None,
+                tool_calls=[SimpleNamespace(
+                    function=SimpleNamespace(
+                        name="memory",
+                        arguments=json.dumps({
+                            "action": "add",
+                            "target": "notes",
+                            "content": "User prefers dark mode.",
+                        }),
+                    ),
+                )],
+            ),
+        )],
+        usage=SimpleNamespace(prompt_tokens=100, completion_tokens=20, total_tokens=120),
+    )
+
+
+class TestFlushMemoriesUsesAuxiliaryClient:
+    """When an auxiliary client is available, flush_memories should use it
+    instead of self.client -- especially critical in Codex mode."""
+
+    def test_flush_uses_auxiliary_when_available(self, monkeypatch):
+        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
+
+        mock_aux_client = MagicMock()
+        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there"},
+                {"role": "user", "content": "Remember this"},
+            ]
+            with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
+                agent.flush_memories(messages)
+
+        mock_aux_client.chat.completions.create.assert_called_once()
+        call_kwargs = mock_aux_client.chat.completions.create.call_args
+        assert call_kwargs.kwargs.get("model") == "gpt-4o-mini" or call_kwargs[1].get("model") == "gpt-4o-mini"
+
+    def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch):
+        """Non-Codex mode with no auxiliary falls back to self.client."""
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+        agent.client = MagicMock()
+        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there"},
+                {"role": "user", "content": "Save this"},
+            ]
+            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
+                agent.flush_memories(messages)
+
+        agent.client.chat.completions.create.assert_called_once()
+
+    def test_flush_executes_memory_tool_calls(self, monkeypatch):
+        """Verify that memory tool calls from the flush response actually get executed."""
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+
+        mock_aux_client = MagicMock()
+        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+                {"role": "user", "content": "Note this"},
+            ]
+            with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
+                agent.flush_memories(messages)
+
+        mock_memory.assert_called_once()
+        call_kwargs = mock_memory.call_args
+        assert call_kwargs.kwargs["action"] == "add"
+        assert call_kwargs.kwargs["target"] == "notes"
+        assert "dark mode" in call_kwargs.kwargs["content"]
+
+    def test_flush_strips_artifacts_from_messages(self, monkeypatch):
+        """After flush, the flush prompt and any response should be removed from messages."""
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+
+        mock_aux_client = MagicMock()
+        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+                {"role": "user", "content": "Remember X"},
+            ]
+            original_len = len(messages)
+            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
+                agent.flush_memories(messages)
+
+        # Messages should not grow from the flush
+        assert len(messages) <= original_len
+        # No flush sentinel should remain
+        for msg in messages:
+            assert "_flush_sentinel" not in msg
+
+
+class TestFlushMemoriesCodexFallback:
+    """When no auxiliary client exists and we're in Codex mode, flush should
+    use the Codex Responses API path instead of chat.completions."""
+
+    def test_codex_mode_no_aux_uses_responses_api(self, monkeypatch):
+        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
+
+        codex_response = SimpleNamespace(
+            output=[
+                SimpleNamespace(
+                    type="function_call",
+                    call_id="call_1",
+                    name="memory",
+                    arguments=json.dumps({
+                        "action": "add",
+                        "target": "notes",
+                        "content": "Codex flush test",
+                    }),
+                ),
+            ],
+            usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60),
+            status="completed",
+            model="gpt-5-codex",
+        )
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)), \
+             patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
+             patch.object(agent, "_build_api_kwargs") as mock_build, \
+             patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
+            mock_build.return_value = {
+                "model": "gpt-5-codex",
+                "instructions": "test",
+                "input": [],
+                "tools": [],
+                "max_output_tokens": 4096,
+            }
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+                {"role": "user", "content": "Save this"},
+            ]
+            agent.flush_memories(messages)
+
+        mock_stream.assert_called_once()
+        mock_memory.assert_called_once()
+        assert mock_memory.call_args.kwargs["content"] == "Codex flush test"
--- a/tests/test_provider_parity.py
+++ b/tests/test_provider_parity.py
@ -0,0 +1,460 @@
+"""Provider parity tests: verify that AIAgent builds correct API kwargs
+and handles responses properly for all supported providers.
+
+Ensures changes to one provider path don't silently break another.
+"""
+
+import json
+import os
+import sys
+import types
+from types import SimpleNamespace
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+from run_agent import AIAgent
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+def _tool_defs(*names):
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": n,
+                "description": f"{n} tool",
+                "parameters": {"type": "object", "properties": {}},
+            },
+        }
+        for n in names
+    ]
+
+
+class _FakeOpenAI:
+    def __init__(self, **kw):
+        self.api_key = kw.get("api_key", "test")
+        self.base_url = kw.get("base_url", "http://test")
+    def close(self):
+        pass
+
+
+def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1"):
+    monkeypatch.setattr("run_agent.get_tool_definitions", lambda **kw: _tool_defs("web_search", "terminal"))
+    monkeypatch.setattr("run_agent.check_toolset_requirements", lambda: {})
+    monkeypatch.setattr("run_agent.OpenAI", _FakeOpenAI)
+    return AIAgent(
+        api_key="test-key",
+        base_url=base_url,
+        provider=provider,
+        api_mode=api_mode,
+        max_iterations=4,
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+
+
+# ── _build_api_kwargs tests ─────────────────────────────────────────────────
+
+class TestBuildApiKwargsOpenRouter:
+    def test_uses_chat_completions_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "messages" in kwargs
+        assert "model" in kwargs
+        assert kwargs["messages"][-1]["content"] == "hi"
+
+    def test_includes_reasoning_in_extra_body(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        extra = kwargs.get("extra_body", {})
+        assert "reasoning" in extra
+        assert extra["reasoning"]["enabled"] is True
+
+    def test_includes_tools(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "tools" in kwargs
+        tool_names = [t["function"]["name"] for t in kwargs["tools"]]
+        assert "web_search" in tool_names
+
+    def test_no_responses_api_fields(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "input" not in kwargs
+        assert "instructions" not in kwargs
+        assert "store" not in kwargs
+
+
+class TestBuildApiKwargsNousPortal:
+    def test_includes_nous_product_tags(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        extra = kwargs.get("extra_body", {})
+        assert extra.get("tags") == ["product=hermes-agent"]
+
+    def test_uses_chat_completions_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "messages" in kwargs
+        assert "input" not in kwargs
+
+
+class TestBuildApiKwargsCustomEndpoint:
+    def test_uses_chat_completions_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "messages" in kwargs
+        assert "input" not in kwargs
+
+    def test_no_openrouter_extra_body(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        extra = kwargs.get("extra_body", {})
+        assert "reasoning" not in extra
+
+
+class TestBuildApiKwargsCodex:
+    def test_uses_responses_api_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "input" in kwargs
+        assert "instructions" in kwargs
+        assert "messages" not in kwargs
+        assert kwargs["store"] is False
+
+    def test_includes_reasoning_config(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "reasoning" in kwargs
+        assert kwargs["reasoning"]["effort"] == "medium"
+
+    def test_includes_encrypted_content_in_include(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "reasoning.encrypted_content" in kwargs.get("include", [])
+
+    def test_tools_converted_to_responses_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        tools = kwargs.get("tools", [])
+        assert len(tools) > 0
+        # Responses format has "name" at top level, not nested under "function"
+        assert "name" in tools[0]
+        assert "function" not in tools[0]
+
+
+# ── Message conversion tests ────────────────────────────────────────────────
+
+class TestChatMessagesToResponsesInput:
+    """Verify _chat_messages_to_responses_input for Codex mode."""
+
+    def test_user_message_passes_through(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hello"}]
+        items = agent._chat_messages_to_responses_input(messages)
+        assert items == [{"role": "user", "content": "hello"}]
+
+    def test_system_messages_filtered(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [
+            {"role": "system", "content": "be helpful"},
+            {"role": "user", "content": "hello"},
+        ]
+        items = agent._chat_messages_to_responses_input(messages)
+        assert len(items) == 1
+        assert items[0]["role"] == "user"
+
+    def test_assistant_tool_calls_become_function_call_items(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{
+                "id": "call_abc",
+                "call_id": "call_abc",
+                "function": {"name": "web_search", "arguments": '{"query": "test"}'},
+            }],
+        }]
+        items = agent._chat_messages_to_responses_input(messages)
+        fc_items = [i for i in items if i.get("type") == "function_call"]
+        assert len(fc_items) == 1
+        assert fc_items[0]["name"] == "web_search"
+        assert fc_items[0]["call_id"] == "call_abc"
+
+    def test_tool_results_become_function_call_output(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "tool", "tool_call_id": "call_abc", "content": "result here"}]
+        items = agent._chat_messages_to_responses_input(messages)
+        assert items[0]["type"] == "function_call_output"
+        assert items[0]["call_id"] == "call_abc"
+        assert items[0]["output"] == "result here"
+
+    def test_encrypted_reasoning_replayed(self, monkeypatch):
+        """Encrypted reasoning items from previous turns must be included in input."""
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [
+            {"role": "user", "content": "think about this"},
+            {
+                "role": "assistant",
+                "content": "I thought about it.",
+                "codex_reasoning_items": [
+                    {"type": "reasoning", "id": "rs_abc", "encrypted_content": "gAAAA_test_blob"},
+                ],
+            },
+            {"role": "user", "content": "continue"},
+        ]
+        items = agent._chat_messages_to_responses_input(messages)
+        reasoning_items = [i for i in items if i.get("type") == "reasoning"]
+        assert len(reasoning_items) == 1
+        assert reasoning_items[0]["encrypted_content"] == "gAAAA_test_blob"
+
+    def test_no_reasoning_items_for_non_codex_messages(self, monkeypatch):
+        """Messages without codex_reasoning_items should not inject anything."""
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [
+            {"role": "assistant", "content": "hi"},
+            {"role": "user", "content": "hello"},
+        ]
+        items = agent._chat_messages_to_responses_input(messages)
+        reasoning_items = [i for i in items if i.get("type") == "reasoning"]
+        assert len(reasoning_items) == 0
+
+
+# ── Response normalization tests ─────────────────────────────────────────────
+
+class TestNormalizeCodexResponse:
+    """Verify _normalize_codex_response extracts all fields correctly."""
+
+    def _make_codex_agent(self, monkeypatch):
+        return _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                           base_url="https://chatgpt.com/backend-api/codex")
+
+    def test_text_response(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="message", status="completed",
+                    content=[SimpleNamespace(type="output_text", text="Hello!")],
+                    phase="final_answer"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert msg.content == "Hello!"
+        assert reason == "stop"
+
+    def test_reasoning_summary_extracted(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="reasoning",
+                    encrypted_content="gAAAA_blob",
+                    summary=[SimpleNamespace(type="summary_text", text="Thinking about math")],
+                    id="rs_123", status=None),
+                SimpleNamespace(type="message", status="completed",
+                    content=[SimpleNamespace(type="output_text", text="42")],
+                    phase="final_answer"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert msg.content == "42"
+        assert "math" in msg.reasoning
+        assert reason == "stop"
+
+    def test_encrypted_content_captured(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="reasoning",
+                    encrypted_content="gAAAA_secret_blob_123",
+                    summary=[SimpleNamespace(type="summary_text", text="Thinking")],
+                    id="rs_456", status=None),
+                SimpleNamespace(type="message", status="completed",
+                    content=[SimpleNamespace(type="output_text", text="done")],
+                    phase="final_answer"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert msg.codex_reasoning_items is not None
+        assert len(msg.codex_reasoning_items) == 1
+        assert msg.codex_reasoning_items[0]["encrypted_content"] == "gAAAA_secret_blob_123"
+        assert msg.codex_reasoning_items[0]["id"] == "rs_456"
+
+    def test_no_encrypted_content_when_missing(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="message", status="completed",
+                    content=[SimpleNamespace(type="output_text", text="no reasoning")],
+                    phase="final_answer"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert msg.codex_reasoning_items is None
+
+    def test_tool_calls_extracted(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="function_call", status="completed",
+                    call_id="call_xyz", name="web_search",
+                    arguments='{"query":"test"}', id="fc_xyz"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert reason == "tool_calls"
+        assert len(msg.tool_calls) == 1
+        assert msg.tool_calls[0].function.name == "web_search"
+
+
+# ── Chat completions response handling (OpenRouter/Nous) ─────────────────────
+
+class TestBuildAssistantMessage:
+    """Verify _build_assistant_message works for all provider response formats."""
+
+    def test_openrouter_reasoning_fields(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        msg = SimpleNamespace(
+            content="answer",
+            tool_calls=None,
+            reasoning="I thought about it",
+            reasoning_content=None,
+            reasoning_details=None,
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert result["content"] == "answer"
+        assert result["reasoning"] == "I thought about it"
+        assert "codex_reasoning_items" not in result
+
+    def test_openrouter_reasoning_details_preserved_unmodified(self, monkeypatch):
+        """reasoning_details must be passed back exactly as received for
+        multi-turn continuity (OpenRouter, Anthropic, OpenAI all need this)."""
+        agent = _make_agent(monkeypatch, "openrouter")
+        original_detail = {
+            "type": "thinking",
+            "thinking": "deep thoughts here",
+            "signature": "sig123_opaque_blob",
+            "encrypted_content": "some_provider_blob",
+            "extra_field": "should_not_be_dropped",
+        }
+        msg = SimpleNamespace(
+            content="answer",
+            tool_calls=None,
+            reasoning=None,
+            reasoning_content=None,
+            reasoning_details=[original_detail],
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        stored = result["reasoning_details"][0]
+        # ALL fields must survive, not just type/text/signature
+        assert stored["signature"] == "sig123_opaque_blob"
+        assert stored["encrypted_content"] == "some_provider_blob"
+        assert stored["extra_field"] == "should_not_be_dropped"
+        assert stored["thinking"] == "deep thoughts here"
+
+    def test_codex_preserves_encrypted_reasoning(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        msg = SimpleNamespace(
+            content="result",
+            tool_calls=None,
+            reasoning="summary text",
+            reasoning_content=None,
+            reasoning_details=None,
+            codex_reasoning_items=[
+                {"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"},
+            ],
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert result["codex_reasoning_items"] == [
+            {"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"},
+        ]
+
+    def test_plain_message_no_codex_items(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        msg = SimpleNamespace(
+            content="simple",
+            tool_calls=None,
+            reasoning=None,
+            reasoning_content=None,
+            reasoning_details=None,
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert "codex_reasoning_items" not in result
+
+
+# ── Auxiliary client provider resolution ─────────────────────────────────────
+
+class TestAuxiliaryClientProviderPriority:
+    """Verify auxiliary client resolution doesn't break for any provider."""
+
+    def test_openrouter_always_wins(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        from agent.auxiliary_client import get_text_auxiliary_client
+        with patch("agent.auxiliary_client.OpenAI") as mock:
+            client, model = get_text_auxiliary_client()
+        assert model == "google/gemini-3-flash-preview"
+        assert "openrouter" in str(mock.call_args.kwargs["base_url"]).lower()
+
+    def test_nous_when_no_openrouter(self, monkeypatch):
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        from agent.auxiliary_client import get_text_auxiliary_client
+        with patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "nous-tok"}), \
+             patch("agent.auxiliary_client.OpenAI") as mock:
+            client, model = get_text_auxiliary_client()
+        assert model == "gemini-3-flash"
+
+    def test_custom_endpoint_when_no_nous(self, monkeypatch):
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        from agent.auxiliary_client import get_text_auxiliary_client
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock:
+            client, model = get_text_auxiliary_client()
+        assert mock.call_args.kwargs["base_url"] == "http://localhost:1234/v1"
+
+    def test_codex_fallback_last_resort(self, monkeypatch):
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        from agent.auxiliary_client import get_text_auxiliary_client, CodexAuxiliaryClient
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value="codex-tok"), \
+             patch("agent.auxiliary_client.OpenAI"):
+            client, model = get_text_auxiliary_client()
+        assert model == "gpt-5.3-codex"
+        assert isinstance(client, CodexAuxiliaryClient)
--- a/tests/test_run_agent_codex_responses.py
+++ b/tests/test_run_agent_codex_responses.py
@ -530,12 +530,27 @@ def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id
 def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch):
    agent = _build_agent(monkeypatch)
    kwargs = _codex_request_kwargs()
-    kwargs["temperature"] = 0
+    kwargs["some_unknown_field"] = "value"

    with pytest.raises(ValueError, match="unsupported field"):
        agent._preflight_codex_api_kwargs(kwargs)


+def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    kwargs = _codex_request_kwargs()
+    kwargs["reasoning"] = {"effort": "high", "summary": "auto"}
+    kwargs["include"] = ["reasoning.encrypted_content"]
+    kwargs["temperature"] = 0.7
+    kwargs["max_output_tokens"] = 4096
+
+    result = agent._preflight_codex_api_kwargs(kwargs)
+    assert result["reasoning"] == {"effort": "high", "summary": "auto"}
+    assert result["include"] == ["reasoning.encrypted_content"]
+    assert result["temperature"] == 0.7
+    assert result["max_output_tokens"] == 4096
+
+
 def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
    agent = _build_agent(monkeypatch)
    responses = [_codex_tool_call_response(), _codex_message_response("done")]