Harden Codex auth refresh and responses compatibility

This commit is contained in:
George Pickett 2026-02-25 19:27:54 -08:00
parent 91bdb9eb2d
commit 74c662b63a
9 changed files with 996 additions and 22 deletions

View file

@ -1,4 +1,7 @@
import json
import time
import base64
from contextlib import contextmanager
from pathlib import Path
from types import SimpleNamespace
@ -9,6 +12,7 @@ from hermes_cli.auth import (
AuthError,
DEFAULT_CODEX_BASE_URL,
PROVIDER_REGISTRY,
_persist_codex_auth_payload,
_login_openai_codex,
login_command,
get_codex_auth_status,
@ -37,6 +41,12 @@ def _write_codex_auth(codex_home: Path, *, access_token: str = "access", refresh
return auth_file
def _jwt_with_exp(exp_epoch: int) -> str:
payload = {"exp": exp_epoch}
encoded = base64.urlsafe_b64encode(json.dumps(payload).encode("utf-8")).rstrip(b"=").decode("utf-8")
return f"h.{encoded}.s"
def test_read_codex_auth_file_success(tmp_path, monkeypatch):
codex_home = tmp_path / "codex-home"
auth_file = _write_codex_auth(codex_home)
@ -61,12 +71,107 @@ def test_resolve_codex_runtime_credentials_missing_access_token(tmp_path, monkey
assert exc.value.relogin_required is True
def test_resolve_codex_runtime_credentials_refreshes_expiring_token(tmp_path, monkeypatch):
codex_home = tmp_path / "codex-home"
expiring_token = _jwt_with_exp(int(time.time()) - 10)
_write_codex_auth(codex_home, access_token=expiring_token, refresh_token="refresh-old")
monkeypatch.setenv("CODEX_HOME", str(codex_home))
called = {"count": 0}
def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
called["count"] += 1
assert auth_path == codex_home / "auth.json"
assert lock_held is True
return {"access_token": "access-new", "refresh_token": "refresh-new"}
monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
resolved = resolve_codex_runtime_credentials()
assert called["count"] == 1
assert resolved["api_key"] == "access-new"
def test_resolve_codex_runtime_credentials_force_refresh(tmp_path, monkeypatch):
codex_home = tmp_path / "codex-home"
_write_codex_auth(codex_home, access_token="access-current", refresh_token="refresh-old")
monkeypatch.setenv("CODEX_HOME", str(codex_home))
called = {"count": 0}
def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
called["count"] += 1
assert lock_held is True
return {"access_token": "access-forced", "refresh_token": "refresh-new"}
monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
resolved = resolve_codex_runtime_credentials(force_refresh=True, refresh_if_expiring=False)
assert called["count"] == 1
assert resolved["api_key"] == "access-forced"
def test_resolve_codex_runtime_credentials_uses_file_lock_on_refresh(tmp_path, monkeypatch):
codex_home = tmp_path / "codex-home"
_write_codex_auth(codex_home, access_token="access-current", refresh_token="refresh-old")
monkeypatch.setenv("CODEX_HOME", str(codex_home))
lock_calls = {"enter": 0, "exit": 0}
@contextmanager
def _fake_lock(auth_path, timeout_seconds=15.0):
assert auth_path == codex_home / "auth.json"
lock_calls["enter"] += 1
try:
yield
finally:
lock_calls["exit"] += 1
refresh_calls = {"count": 0}
def _fake_refresh(*, payload, auth_path, timeout_seconds, lock_held=False):
refresh_calls["count"] += 1
assert lock_held is True
return {"access_token": "access-updated", "refresh_token": "refresh-updated"}
monkeypatch.setattr("hermes_cli.auth._codex_auth_file_lock", _fake_lock)
monkeypatch.setattr("hermes_cli.auth._refresh_codex_auth_tokens", _fake_refresh)
resolved = resolve_codex_runtime_credentials(force_refresh=True, refresh_if_expiring=False)
assert refresh_calls["count"] == 1
assert lock_calls["enter"] == 1
assert lock_calls["exit"] == 1
assert resolved["api_key"] == "access-updated"
def test_resolve_provider_explicit_codex_does_not_fallback(monkeypatch):
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
assert resolve_provider("openai-codex") == "openai-codex"
def test_persist_codex_auth_payload_writes_atomically(tmp_path):
auth_path = tmp_path / "auth.json"
auth_path.write_text('{"stale":true}\n')
payload = {
"auth_mode": "oauth",
"tokens": {
"access_token": "next-access",
"refresh_token": "next-refresh",
},
"last_refresh": "2026-02-26T00:00:00Z",
}
_persist_codex_auth_payload(auth_path, payload)
stored = json.loads(auth_path.read_text())
assert stored == payload
assert list(tmp_path.glob(".auth.json.*.tmp")) == []
def test_get_codex_auth_status_not_logged_in(tmp_path, monkeypatch):
monkeypatch.setenv("CODEX_HOME", str(tmp_path / "missing-codex-home"))
status = get_codex_auth_status()

View file

@ -0,0 +1,175 @@
import asyncio
import sys
import types
from types import SimpleNamespace
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
sys.modules.setdefault("fal_client", types.SimpleNamespace())
import cron.scheduler as cron_scheduler
import gateway.run as gateway_run
import run_agent
from gateway.config import Platform
from gateway.session import SessionSource
def _patch_agent_bootstrap(monkeypatch):
monkeypatch.setattr(
run_agent,
"get_tool_definitions",
lambda **kwargs: [
{
"type": "function",
"function": {
"name": "terminal",
"description": "Run shell commands.",
"parameters": {"type": "object", "properties": {}},
},
}
],
)
monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
def _codex_message_response(text: str):
return SimpleNamespace(
output=[
SimpleNamespace(
type="message",
content=[SimpleNamespace(type="output_text", text=text)],
)
],
usage=SimpleNamespace(input_tokens=5, output_tokens=3, total_tokens=8),
status="completed",
model="gpt-5-codex",
)
class _UnauthorizedError(RuntimeError):
def __init__(self):
super().__init__("Error code: 401 - unauthorized")
self.status_code = 401
class _FakeOpenAI:
def __init__(self, **kwargs):
self.kwargs = kwargs
def close(self):
return None
class _Codex401ThenSuccessAgent(run_agent.AIAgent):
refresh_attempts = 0
last_init = {}
def __init__(self, *args, **kwargs):
kwargs.setdefault("skip_context_files", True)
kwargs.setdefault("skip_memory", True)
kwargs.setdefault("max_iterations", 4)
type(self).last_init = dict(kwargs)
super().__init__(*args, **kwargs)
self._cleanup_task_resources = lambda task_id: None
self._persist_session = lambda messages, history=None: None
self._save_trajectory = lambda messages, user_message, completed: None
self._save_session_log = lambda messages: None
def _try_refresh_codex_client_credentials(self, *, force: bool = True) -> bool:
type(self).refresh_attempts += 1
return True
def run_conversation(self, user_message: str, conversation_history=None):
calls = {"api": 0}
def _fake_api_call(api_kwargs):
calls["api"] += 1
if calls["api"] == 1:
raise _UnauthorizedError()
return _codex_message_response("Recovered via refresh")
self._interruptible_api_call = _fake_api_call
return super().run_conversation(user_message, conversation_history=conversation_history)
def test_cron_run_job_codex_path_handles_internal_401_refresh(monkeypatch):
_patch_agent_bootstrap(monkeypatch)
monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
monkeypatch.setattr(run_agent, "AIAgent", _Codex401ThenSuccessAgent)
monkeypatch.setattr(
"hermes_cli.runtime_provider.resolve_runtime_provider",
lambda requested=None: {
"provider": "openai-codex",
"api_mode": "codex_responses",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "codex-token",
},
)
monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc))
_Codex401ThenSuccessAgent.refresh_attempts = 0
_Codex401ThenSuccessAgent.last_init = {}
success, output, final_response, error = cron_scheduler.run_job(
{"id": "job-1", "name": "Codex Refresh Test", "prompt": "ping"}
)
assert success is True
assert error is None
assert final_response == "Recovered via refresh"
assert "Recovered via refresh" in output
assert _Codex401ThenSuccessAgent.refresh_attempts == 1
assert _Codex401ThenSuccessAgent.last_init["provider"] == "openai-codex"
assert _Codex401ThenSuccessAgent.last_init["api_mode"] == "codex_responses"
def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
_patch_agent_bootstrap(monkeypatch)
monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
monkeypatch.setattr(run_agent, "AIAgent", _Codex401ThenSuccessAgent)
monkeypatch.setattr(
gateway_run,
"_resolve_runtime_agent_kwargs",
lambda: {
"provider": "openai-codex",
"api_mode": "codex_responses",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "codex-token",
},
)
monkeypatch.setenv("HERMES_TOOL_PROGRESS", "false")
_Codex401ThenSuccessAgent.refresh_attempts = 0
_Codex401ThenSuccessAgent.last_init = {}
runner = gateway_run.GatewayRunner.__new__(gateway_run.GatewayRunner)
runner.adapters = {}
runner._ephemeral_system_prompt = ""
runner._prefill_messages = []
runner._reasoning_config = None
runner._running_agents = {}
source = SessionSource(
platform=Platform.LOCAL,
chat_id="cli",
chat_name="CLI",
chat_type="dm",
user_id="user-1",
)
result = asyncio.run(
runner._run_agent(
message="ping",
context_prompt="",
history=[],
source=source,
session_id="session-1",
session_key="agent:main:local:dm",
)
)
assert result["final_response"] == "Recovered via refresh"
assert _Codex401ThenSuccessAgent.refresh_attempts == 1
assert _Codex401ThenSuccessAgent.last_init["provider"] == "openai-codex"
assert _Codex401ThenSuccessAgent.last_init["api_mode"] == "codex_responses"

View file

@ -0,0 +1,40 @@
import json
from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, get_codex_model_ids
def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch):
codex_home = tmp_path / "codex-home"
codex_home.mkdir(parents=True, exist_ok=True)
(codex_home / "config.toml").write_text('model = "gpt-5.2-codex"\n')
(codex_home / "models_cache.json").write_text(
json.dumps(
{
"models": [
{"slug": "gpt-5.3-codex", "priority": 20, "supported_in_api": True},
{"slug": "gpt-5.1-codex", "priority": 5, "supported_in_api": True},
{"slug": "gpt-4o", "priority": 1, "supported_in_api": True},
{"slug": "gpt-5-hidden-codex", "priority": 2, "visibility": "hidden"},
]
}
)
)
monkeypatch.setenv("CODEX_HOME", str(codex_home))
models = get_codex_model_ids()
assert models[0] == "gpt-5.2-codex"
assert "gpt-5.1-codex" in models
assert "gpt-5.3-codex" in models
assert "gpt-4o" not in models
assert "gpt-5-hidden-codex" not in models
def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatch):
codex_home = tmp_path / "codex-home"
codex_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("CODEX_HOME", str(codex_home))
models = get_codex_model_ids()
assert models[: len(DEFAULT_CODEX_MODELS)] == DEFAULT_CODEX_MODELS

View file

@ -2,6 +2,8 @@ import sys
import types
from types import SimpleNamespace
import pytest
sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
@ -156,6 +158,16 @@ class _FakeCreateStream:
self.closed = True
def _codex_request_kwargs():
return {
"model": "gpt-5-codex",
"instructions": "You are Hermes.",
"input": [{"role": "user", "content": "Ping"}],
"tools": None,
"store": False,
}
def test_api_mode_uses_explicit_provider_when_codex(monkeypatch):
_patch_agent_bootstrap(monkeypatch)
agent = run_agent.AIAgent(
@ -222,6 +234,10 @@ def test_build_api_kwargs_codex(monkeypatch):
assert kwargs["tools"][0]["name"] == "terminal"
assert kwargs["tools"][0]["strict"] is False
assert "function" not in kwargs["tools"][0]
assert kwargs["store"] is False
assert "timeout" not in kwargs
assert "max_tokens" not in kwargs
assert "extra_body" not in kwargs
def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
@ -243,7 +259,7 @@ def test_run_codex_stream_retries_when_completed_event_missing(monkeypatch):
)
)
response = agent._run_codex_stream({"model": "gpt-5-codex"})
response = agent._run_codex_stream(_codex_request_kwargs())
assert calls["stream"] == 2
assert response.output[0].content[0].text == "stream ok"
@ -269,7 +285,7 @@ def test_run_codex_stream_falls_back_to_create_after_stream_completion_error(mon
)
)
response = agent._run_codex_stream({"model": "gpt-5-codex"})
response = agent._run_codex_stream(_codex_request_kwargs())
assert calls["stream"] == 2
assert calls["create"] == 1
assert response.output[0].content[0].text == "create fallback ok"
@ -304,7 +320,7 @@ def test_run_codex_stream_fallback_parses_create_stream_events(monkeypatch):
)
)
response = agent._run_codex_stream({"model": "gpt-5-codex"})
response = agent._run_codex_stream(_codex_request_kwargs())
assert calls["stream"] == 2
assert calls["create"] == 1
assert create_stream.closed is True
@ -323,6 +339,72 @@ def test_run_conversation_codex_plain_text(monkeypatch):
assert result["messages"][-1]["content"] == "OK"
def test_run_conversation_codex_refreshes_after_401_and_retries(monkeypatch):
agent = _build_agent(monkeypatch)
calls = {"api": 0, "refresh": 0}
class _UnauthorizedError(RuntimeError):
def __init__(self):
super().__init__("Error code: 401 - unauthorized")
self.status_code = 401
def _fake_api_call(api_kwargs):
calls["api"] += 1
if calls["api"] == 1:
raise _UnauthorizedError()
return _codex_message_response("Recovered after refresh")
def _fake_refresh(*, force=True):
calls["refresh"] += 1
assert force is True
return True
monkeypatch.setattr(agent, "_interruptible_api_call", _fake_api_call)
monkeypatch.setattr(agent, "_try_refresh_codex_client_credentials", _fake_refresh)
result = agent.run_conversation("Say OK")
assert calls["api"] == 2
assert calls["refresh"] == 1
assert result["completed"] is True
assert result["final_response"] == "Recovered after refresh"
def test_try_refresh_codex_client_credentials_rebuilds_client(monkeypatch):
agent = _build_agent(monkeypatch)
closed = {"value": False}
rebuilt = {"kwargs": None}
class _ExistingClient:
def close(self):
closed["value"] = True
class _RebuiltClient:
pass
def _fake_openai(**kwargs):
rebuilt["kwargs"] = kwargs
return _RebuiltClient()
monkeypatch.setattr(
"hermes_cli.auth.resolve_codex_runtime_credentials",
lambda force_refresh=True: {
"api_key": "new-codex-token",
"base_url": "https://chatgpt.com/backend-api/codex",
},
)
monkeypatch.setattr(run_agent, "OpenAI", _fake_openai)
agent.client = _ExistingClient()
ok = agent._try_refresh_codex_client_credentials(force=True)
assert ok is True
assert closed["value"] is True
assert rebuilt["kwargs"]["api_key"] == "new-codex-token"
assert rebuilt["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
assert isinstance(agent.client, _RebuiltClient)
def test_run_conversation_codex_tool_round_trip(monkeypatch):
agent = _build_agent(monkeypatch)
responses = [_codex_tool_call_response(), _codex_message_response("done")]
@ -404,6 +486,56 @@ def test_chat_messages_to_responses_input_accepts_call_pipe_fc_ids(monkeypatch):
assert function_output["call_id"] == "call_pair123"
def test_preflight_codex_api_kwargs_strips_optional_function_call_id(monkeypatch):
agent = _build_agent(monkeypatch)
preflight = agent._preflight_codex_api_kwargs(
{
"model": "gpt-5-codex",
"instructions": "You are Hermes.",
"input": [
{"role": "user", "content": "hi"},
{
"type": "function_call",
"id": "call_bad",
"call_id": "call_good",
"name": "terminal",
"arguments": "{}",
},
],
"tools": [],
"store": False,
}
)
fn_call = next(item for item in preflight["input"] if item.get("type") == "function_call")
assert fn_call["call_id"] == "call_good"
assert "id" not in fn_call
def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id(monkeypatch):
agent = _build_agent(monkeypatch)
with pytest.raises(ValueError, match="function_call_output is missing call_id"):
agent._preflight_codex_api_kwargs(
{
"model": "gpt-5-codex",
"instructions": "You are Hermes.",
"input": [{"type": "function_call_output", "output": "{}"}],
"tools": [],
"store": False,
}
)
def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch):
agent = _build_agent(monkeypatch)
kwargs = _codex_request_kwargs()
kwargs["temperature"] = 0
with pytest.raises(ValueError, match="unsupported field"):
agent._preflight_codex_api_kwargs(kwargs)
def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
agent = _build_agent(monkeypatch)
responses = [_codex_tool_call_response(), _codex_message_response("done")]