Harden Codex auth refresh and responses compatibility

2026-02-25 19:27:54 -08:00 · 2026-02-25 19:27:54 -08:00 · 74c662b63a
commit 74c662b63a
parent 91bdb9eb2d
9 changed files with 996 additions and 22 deletions
--- a/tests/test_auth_codex_provider.py
+++ b/tests/test_auth_codex_provider.py
@ -1,4 +1,7 @@
 import json
+import time
+import base64
+from contextlib import contextmanager
 from pathlib import Path
 from types import SimpleNamespace

@ -9,6 +12,7 @@ from hermes_cli.auth import (
    AuthError,
    DEFAULT_CODEX_BASE_URL,
    PROVIDER_REGISTRY,
+    _persist_codex_auth_payload,
    _login_openai_codex,
    login_command,
    get_codex_auth_status,
@ -37,6 +41,12 @@ def _write_codex_auth(codex_home: Path, *, access_token: str = "access", refresh
    return auth_file


+def _jwt_with_exp(exp_epoch: int) -> str:
+    payload = {"exp": exp_epoch}
+    encoded = base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8")).rstrip(b"=").decode("utf-8")
+    return f"h.{encoded}.s"
+
+
 def test_read_codex_auth_file_success(tmp_path, monkeypatch):
    codex_home = tmp_path / "codex-home"
    auth_file = _write_codex_auth(codex_home)
@ -61,12 +71,107 @@ def test_resolve_codex_runtime_credentials_missing_access_token(tmp_path, monkey
    assert exc.value.relogin_required is True


+def test_resolve_codex_runtime_credentials_refreshes_expiring_token(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    expiring_token = _jwt_with_exp(int(time.time()) - 10)
+    _write_codex_auth(codex_home, access_token=expiring_token, refresh_token="refresh-old")
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    called = {"count": 0}
+
+    def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
+        called["count"] += 1
+        assert auth_path == codex_home / "auth.json"
+        assert lock_held is True
+        return {"access_token": "access-new", "refresh_token": "refresh-new"}
+
+    monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
+
+    resolved = resolve_codex_runtime_credentials()
+
+    assert called["count"] == 1
+    assert resolved["api_key"] == "access-new"
+
+
+def test_resolve_codex_runtime_credentials_force_refresh(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    _write_codex_auth(codex_home, access_token="access-current", refresh_token="refresh-old")
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    called = {"count": 0}
+
+    def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
+        called["count"] += 1
+        assert lock_held is True
+        return {"access_token": "access-forced", "refresh_token": "refresh-new"}
+
+    monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
+
+    resolved = resolve_codex_runtime_credentials(force_refresh=True, refresh_if_expiring=False)
+
+    assert called["count"] == 1
+    assert resolved["api_key"] == "access-forced"
+
+
+def test_resolve_codex_runtime_credentials_uses_file_lock_on_refresh(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    _write_codex_auth(codex_home, access_token="access-current", refresh_token="refresh-old")
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    lock_calls = {"enter": 0, "exit": 0}
+
+    @contextmanager
+    def _fake_lock(auth_path, timeout_seconds=15.0):
+        assert auth_path == codex_home / "auth.json"
+        lock_calls["enter"] += 1
+        try:
+            yield
+        finally:
+            lock_calls["exit"] += 1
+
+    refresh_calls = {"count": 0}
+
+    def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
+        refresh_calls["count"] += 1
+        assert lock_held is True
+        return {"access_token": "access-updated", "refresh_token": "refresh-updated"}
+
+    monkeypatch.setattr("hermes_cli.auth._codex_auth_file_lock", _fake_lock)
+    monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
+
+    resolved = resolve_codex_runtime_credentials(force_refresh=True, refresh_if_expiring=False)
+
+    assert refresh_calls["count"] == 1
+    assert lock_calls["enter"] == 1
+    assert lock_calls["exit"] == 1
+    assert resolved["api_key"] == "access-updated"
+
+
 def test_resolve_provider_explicit_codex_does_not_fallback(monkeypatch):
    monkeypatch.delenv("OPENAI_API_KEY", raising=False)
    monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
    assert resolve_provider("openai-codex") == "openai-codex"


+def test_persist_codex_auth_payload_writes_atomically(tmp_path):
+    auth_path = tmp_path / "auth.json"
+    auth_path.write_text('{"stale":true}\n')
+    payload = {
+        "auth_mode": "oauth",
+        "tokens": {
+            "access_token": "next-access",
+            "refresh_token": "next-refresh",
+        },
+        "last_refresh": "2026-02-26T00:00:00Z",
+    }
+
+    _persist_codex_auth_payload(auth_path, payload)
+
+    stored = json.loads(auth_path.read_text())
+    assert stored == payload
+    assert list(tmp_path.glob(".auth.json.*.tmp")) == []
+
+
 def test_get_codex_auth_status_not_logged_in(tmp_path, monkeypatch):
    monkeypatch.setenv("CODEX_HOME", str(tmp_path / "missing-codex-home"))
    status = get_codex_auth_status()
--- a/tests/test_codex_execution_paths.py
+++ b/tests/test_codex_execution_paths.py
@ -0,0 +1,175 @@
+import asyncio
+import sys
+import types
+from types import SimpleNamespace
+
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+import cron.scheduler as cron_scheduler
+import gateway.run as gateway_run
+import run_agent
+from gateway.config import Platform
+from gateway.session import SessionSource
+
+
+def _patch_agent_bootstrap(monkeypatch):
+    monkeypatch.setattr(
+        run_agent,
+        "get_tool_definitions",
+        lambda **kwargs: [
+            {
+                "type": "function",
+                "function": {
+                    "name": "terminal",
+                    "description": "Run shell commands.",
+                    "parameters": {"type": "object", "properties": {}},
+                },
+            }
+        ],
+    )
+    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
+
+
+def _codex_message_response(text: str):
+    return SimpleNamespace(
+        output=[
+            SimpleNamespace(
+                type="message",
+                content=[SimpleNamespace(type="output_text", text=text)],
+            )
+        ],
+        usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
+        status="completed",
+        model="gpt-5-codex",
+    )
+
+
+class _UnauthorizedError(RuntimeError):
+    def __init__(self):
+        super().__init__("Error code: 401 - unauthorized")
+        self.status_code = 401
+
+
+class _FakeOpenAI:
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+
+    def close(self):
+        return None
+
+
+class _Codex401ThenSuccessAgent(run_agent.AIAgent):
+    refresh_attempts = 0
+    last_init = {}
+
+    def __init__(self, *args, **kwargs):
+        kwargs.setdefault("skip_context_files", True)
+        kwargs.setdefault("skip_memory", True)
+        kwargs.setdefault("max_iterations", 4)
+        type(self).last_init = dict(kwargs)
+        super().__init__(*args, **kwargs)
+        self._cleanup_task_resources = lambda task_id: None
+        self._persist_session = lambda messages, history=None: None
+        self._save_trajectory = lambda messages, user_message, completed: None
+        self._save_session_log = lambda messages: None
+
+    def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
+        type(self).refresh_attempts += 1
+        return True
+
+    def run_conversation(self, user_message: str, conversation_history=None):
+        calls = {"api": 0}
+
+        def _fake_api_call(api_kwargs):
+            calls["api"] += 1
+            if calls["api"] == 1:
+                raise _UnauthorizedError()
+            return _codex_message_response("Recovered via refresh")
+
+        self._interruptible_api_call = _fake_api_call
+        return super().run_conversation(user_message, conversation_history=conversation_history)
+
+
+def test_cron_run_job_codex_path_handles_internal_401_refresh(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
+    monkeypatch.setattr(run_agent, "AIAgent", _Codex401ThenSuccessAgent)
+    monkeypatch.setattr(
+        "hermes_cli.runtime_provider.resolve_runtime_provider",
+        lambda requested=None: {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "codex-token",
+        },
+    )
+    monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
+
+    _Codex401ThenSuccessAgent.refresh_attempts = 0
+    _Codex401ThenSuccessAgent.last_init = {}
+
+    success, output, final_response, error = cron_scheduler.run_job(
+        {"id": "job-1", "name": "Codex Refresh Test", "prompt": "ping"}
+    )
+
+    assert success is True
+    assert error is None
+    assert final_response == "Recovered via refresh"
+    assert "Recovered via refresh" in output
+    assert _Codex401ThenSuccessAgent.refresh_attempts == 1
+    assert _Codex401ThenSuccessAgent.last_init["provider"] == "openai-codex"
+    assert _Codex401ThenSuccessAgent.last_init["api_mode"] == "codex_responses"
+
+
+def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
+    monkeypatch.setattr(run_agent, "AIAgent", _Codex401ThenSuccessAgent)
+    monkeypatch.setattr(
+        gateway_run,
+        "_resolve_runtime_agent_kwargs",
+        lambda: {
+            "provider": "openai-codex",
+            "api_mode": "codex_responses",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "codex-token",
+        },
+    )
+    monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
+
+    _Codex401ThenSuccessAgent.refresh_attempts = 0
+    _Codex401ThenSuccessAgent.last_init = {}
+
+    runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
+    runner.adapters = {}
+    runner._ephemeral_system_prompt = ""
+    runner._prefill_messages = []
+    runner._reasoning_config = None
+    runner._running_agents = {}
+
+    source = SessionSource(
+        platform=Platform.LOCAL,
+        chat_id="cli",
+        chat_name="CLI",
+        chat_type="dm",
+        user_id="user-1",
+    )
+
+    result = asyncio.run(
+        runner._run_agent(
+            message="ping",
+            context_prompt="",
+            history=[],
+            source=source,
+            session_id="session-1",
+            session_key="agent:main:local:dm",
+        )
+    )
+
+    assert result["final_response"] == "Recovered via refresh"
+    assert _Codex401ThenSuccessAgent.refresh_attempts == 1
+    assert _Codex401ThenSuccessAgent.last_init["provider"] == "openai-codex"
+    assert _Codex401ThenSuccessAgent.last_init["api_mode"] == "codex_responses"
--- a/tests/test_codex_models.py
+++ b/tests/test_codex_models.py
@ -0,0 +1,40 @@
+import json
+
+from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, get_codex_model_ids
+
+
+def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    codex_home.mkdir(parents=True, exist_ok=True)
+    (codex_home / "config.toml").write_text('model = "gpt-5.2-codex"\n')
+    (codex_home / "models_cache.json").write_text(
+        json.dumps(
+            {
+                "models": [
+                    {"slug": "gpt-5.3-codex", "priority": 20, "supported_in_api": True},
+                    {"slug": "gpt-5.1-codex", "priority": 5, "supported_in_api": True},
+                    {"slug": "gpt-4o", "priority": 1, "supported_in_api": True},
+                    {"slug": "gpt-5-hidden-codex", "priority": 2, "visibility": "hidden"},
+                ]
+            }
+        )
+    )
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    models = get_codex_model_ids()
+
+    assert models[0] == "gpt-5.2-codex"
+    assert "gpt-5.1-codex" in models
+    assert "gpt-5.3-codex" in models
+    assert "gpt-4o" not in models
+    assert "gpt-5-hidden-codex" not in models
+
+
+def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatch):
+    codex_home = tmp_path / "codex-home"
+    codex_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("CODEX_HOME", str(codex_home))
+
+    models = get_codex_model_ids()
+
+    assert models[: len(DEFAULT_CODEX_MODELS)] == DEFAULT_CODEX_MODELS
--- a/tests/test_run_agent_codex_responses.py
+++ b/tests/test_run_agent_codex_responses.py
@ -2,6 +2,8 @@ import sys
 import types
 from types import SimpleNamespace

+import pytest
+

 sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
 sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
@ -156,6 +158,16 @@ class _FakeCreateStream:
        self.closed = True


+def _codex_request_kwargs():
+    return {
+        "model": "gpt-5-codex",
+        "instructions": "You are Hermes.",
+        "input": [{"role": "user", "content": "Ping"}],
+        "tools": None,
+        "store": False,
+    }
+
+
 def test_api_mode_uses_explicit_provider_when_codex(monkeypatch):
    _patch_agent_bootstrap(monkeypatch)
    agent = run_agent.AIAgent(
@ -222,6 +234,10 @@ def test_build_api_kwargs_codex(monkeypatch):
    assert kwargs["tools"][0]["name"] == "terminal"
    assert kwargs["tools"][0]["strict"] is False
    assert "function" not in kwargs["tools"][0]
+    assert kwargs["store"] is False
+    assert "timeout" not in kwargs
+    assert "max_tokens" not in kwargs
+    assert "extra_body" not in kwargs


 def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
@ -243,7 +259,7 @@ def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
        )
    )

-    response = agent._run_codex_stream({"model": "gpt-5-codex"})
+    response = agent._run_codex_stream(_codex_request_kwargs())
    assert calls["stream"] == 2
    assert response.output[0].content[0].text == "stream ok"

@ -269,7 +285,7 @@ def test_run_codex_stream_falls_back_to_create_after_stream_completion_error(mon
        )
    )

-    response = agent._run_codex_stream({"model": "gpt-5-codex"})
+    response = agent._run_codex_stream(_codex_request_kwargs())
    assert calls["stream"] == 2
    assert calls["create"] == 1
    assert response.output[0].content[0].text == "create fallback ok"
@ -304,7 +320,7 @@ def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch):
        )
    )

-    response = agent._run_codex_stream({"model": "gpt-5-codex"})
+    response = agent._run_codex_stream(_codex_request_kwargs())
    assert calls["stream"] == 2
    assert calls["create"] == 1
    assert create_stream.closed is True
@ -323,6 +339,72 @@ def test_run_conversation_codex_plain_text(monkeypatch):
    assert result["messages"][-1]["content"] == "OK"


+def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    calls = {"api": 0, "refresh": 0}
+
+    class _UnauthorizedError(RuntimeError):
+        def __init__(self):
+            super().__init__("Error code: 401 - unauthorized")
+            self.status_code = 401
+
+    def _fake_api_call(api_kwargs):
+        calls["api"] += 1
+        if calls["api"] == 1:
+            raise _UnauthorizedError()
+        return _codex_message_response("Recovered after refresh")
+
+    def _fake_refresh(*, force=True):
+        calls["refresh"] += 1
+        assert force is True
+        return True
+
+    monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
+    monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh)
+
+    result = agent.run_conversation("Say OK")
+
+    assert calls["api"] == 2
+    assert calls["refresh"] == 1
+    assert result["completed"] is True
+    assert result["final_response"] == "Recovered after refresh"
+
+
+def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    closed = {"value": False}
+    rebuilt = {"kwargs": None}
+
+    class _ExistingClient:
+        def close(self):
+            closed["value"] = True
+
+    class _RebuiltClient:
+        pass
+
+    def _fake_openai(**kwargs):
+        rebuilt["kwargs"] = kwargs
+        return _RebuiltClient()
+
+    monkeypatch.setattr(
+        "hermes_cli.auth.resolve_codex_runtime_credentials",
+        lambda force_refresh=True: {
+            "api_key": "new-codex-token",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+        },
+    )
+    monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)
+
+    agent.client = _ExistingClient()
+    ok = agent._try_refresh_codex_client_credentials(force=True)
+
+    assert ok is True
+    assert closed["value"] is True
+    assert rebuilt["kwargs"]["api_key"] == "new-codex-token"
+    assert rebuilt["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
+    assert isinstance(agent.client, _RebuiltClient)
+
+
 def test_run_conversation_codex_tool_round_trip(monkeypatch):
    agent = _build_agent(monkeypatch)
    responses = [_codex_tool_call_response(), _codex_message_response("done")]
@ -404,6 +486,56 @@ def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
    assert function_output["call_id"] == "call_pair123"


+def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    preflight = agent._preflight_codex_api_kwargs(
+        {
+            "model": "gpt-5-codex",
+            "instructions": "You are Hermes.",
+            "input": [
+                {"role": "user", "content": "hi"},
+                {
+                    "type": "function_call",
+                    "id": "call_bad",
+                    "call_id": "call_good",
+                    "name": "terminal",
+                    "arguments": "{}",
+                },
+            ],
+            "tools": [],
+            "store": False,
+        }
+    )
+
+    fn_call = next(item for item in preflight["input"] if item.get("type") == "function_call")
+    assert fn_call["call_id"] == "call_good"
+    assert "id" not in fn_call
+
+
+def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id(monkeypatch):
+    agent = _build_agent(monkeypatch)
+
+    with pytest.raises(ValueError, match="function_call_output is missing call_id"):
+        agent._preflight_codex_api_kwargs(
+            {
+                "model": "gpt-5-codex",
+                "instructions": "You are Hermes.",
+                "input": [{"type": "function_call_output", "output": "{}"}],
+                "tools": [],
+                "store": False,
+            }
+        )
+
+
+def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    kwargs = _codex_request_kwargs()
+    kwargs["temperature"] = 0
+
+    with pytest.raises(ValueError, match="unsupported field"):
+        agent._preflight_codex_api_kwargs(kwargs)
+
+
 def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
    agent = _build_agent(monkeypatch)
    responses = [_codex_tool_call_response(), _codex_message_response("done")]