From f2414bfd457def93f10c895999346b56e60ee239 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 17 Mar 2026 02:04:36 -0700 Subject: [PATCH] feat: allow custom endpoints to use responses API via api_mode override (#1651) Add HERMES_API_MODE env var and model.api_mode config field to let custom OpenAI-compatible endpoints opt into codex_responses mode without requiring the OpenAI Codex OAuth provider path. - _get_configured_api_mode() reads HERMES_API_MODE env (precedence) then model.api_mode from config.yaml; validates against whitelist - Applied in both _resolve_openrouter_runtime() and _resolve_named_custom_runtime() (original PR only covered openrouter) - Fix _dump_api_request_debug() to show /responses URL when in codex_responses mode instead of always showing /chat/completions - Tests for config override, env override, invalid values, named custom providers, and debug dump URL for both API modes Inspired by PR #1041 by @mxyhi. Co-authored-by: mxyhi --- hermes_cli/runtime_provider.py | 23 +++++++- run_agent.py | 4 +- tests/test_run_agent_codex_responses.py | 37 ++++++++++++ tests/test_runtime_provider_resolution.py | 71 +++++++++++++++++++++++ 4 files changed, 131 insertions(+), 4 deletions(-) diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index e0535357..4dad57dd 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -33,6 +33,24 @@ def _get_model_config() -> Dict[str, Any]: return {} +def _get_configured_api_mode(model_cfg: Optional[Dict[str, Any]] = None) -> Optional[str]: + """Return an optional API mode override from env or config. + + Allows custom OpenAI-compatible endpoints to opt into codex_responses + mode via HERMES_API_MODE env var or model.api_mode in config.yaml, + without requiring the OpenAI Codex OAuth provider path. + """ + candidate = os.getenv("HERMES_API_MODE", "").strip().lower() + if not candidate: + cfg = model_cfg if isinstance(model_cfg, dict) else _get_model_config() + raw = cfg.get("api_mode") + if isinstance(raw, str): + candidate = raw.strip().lower() + if candidate in {"chat_completions", "codex_responses"}: + return candidate + return None + + def resolve_requested_provider(requested: Optional[str] = None) -> str: """Resolve provider request from explicit arg, config, then env.""" if requested and requested.strip(): @@ -121,7 +139,7 @@ def _resolve_named_custom_runtime( return { "provider": "openrouter", - "api_mode": "chat_completions", + "api_mode": _get_configured_api_mode() or "chat_completions", "base_url": base_url, "api_key": api_key, "source": f"custom_provider:{custom_provider.get('name', requested_provider)}", @@ -190,10 +208,11 @@ def _resolve_openrouter_runtime( ) source = "explicit" if (explicit_api_key or explicit_base_url) else "env/config" + api_mode = _get_configured_api_mode(model_cfg) or "chat_completions" return { "provider": "openrouter", - "api_mode": "chat_completions", + "api_mode": api_mode, "base_url": base_url, "api_key": api_key, "source": source, diff --git a/run_agent.py b/run_agent.py index f5d1b299..6bad63dd 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1351,7 +1351,7 @@ class AIAgent: error: Optional[Exception] = None, ) -> Optional[Path]: """ - Dump a debug-friendly HTTP request record for chat.completions.create(). + Dump a debug-friendly HTTP request record for the active inference API. Captures the request body from api_kwargs (excluding transport-only keys like timeout). Intended for debugging provider-side 4xx failures where @@ -1374,7 +1374,7 @@ class AIAgent: "reason": reason, "request": { "method": "POST", - "url": f"{self.base_url.rstrip('/')}/chat/completions", + "url": f"{self.base_url.rstrip('/')}{'/responses' if self.api_mode == 'codex_responses' else '/chat/completions'}", "headers": { "Authorization": f"Bearer {self._mask_api_key_for_logs(api_key)}", "Content-Type": "application/json", diff --git a/tests/test_run_agent_codex_responses.py b/tests/test_run_agent_codex_responses.py index cf2694f0..715074d9 100644 --- a/tests/test_run_agent_codex_responses.py +++ b/tests/test_run_agent_codex_responses.py @@ -750,3 +750,40 @@ def test_run_conversation_codex_continues_after_ack_for_directory_listing_prompt for msg in result["messages"] ) assert any(msg.get("role") == "tool" and msg.get("tool_call_id") == "call_1" for msg in result["messages"]) + + +def test_dump_api_request_debug_uses_responses_url(monkeypatch, tmp_path): + """Debug dumps should show /responses URL when in codex_responses mode.""" + import json + agent = _build_agent(monkeypatch) + agent.base_url = "http://127.0.0.1:9208/v1" + agent.logs_dir = tmp_path + + dump_file = agent._dump_api_request_debug(_codex_request_kwargs(), reason="preflight") + + payload = json.loads(dump_file.read_text()) + assert payload["request"]["url"] == "http://127.0.0.1:9208/v1/responses" + + +def test_dump_api_request_debug_uses_chat_completions_url(monkeypatch, tmp_path): + """Debug dumps should show /chat/completions URL for chat_completions mode.""" + import json + _patch_agent_bootstrap(monkeypatch) + agent = run_agent.AIAgent( + model="gpt-4o", + base_url="http://127.0.0.1:9208/v1", + api_key="test-key", + quiet_mode=True, + max_iterations=1, + skip_context_files=True, + skip_memory=True, + ) + agent.logs_dir = tmp_path + + dump_file = agent._dump_api_request_debug( + {"model": "gpt-4o", "messages": [{"role": "user", "content": "hi"}]}, + reason="preflight", + ) + + payload = json.loads(dump_file.read_text()) + assert payload["request"]["url"] == "http://127.0.0.1:9208/v1/chat/completions" diff --git a/tests/test_runtime_provider_resolution.py b/tests/test_runtime_provider_resolution.py index c02fb3cd..e72b8308 100644 --- a/tests/test_runtime_provider_resolution.py +++ b/tests/test_runtime_provider_resolution.py @@ -326,3 +326,74 @@ def test_resolve_requested_provider_precedence(monkeypatch): monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False) assert rp.resolve_requested_provider() == "auto" + + +# ── api_mode override tests ───────────────────────────────────────────── + + +def test_custom_endpoint_api_mode_from_config(monkeypatch): + """model.api_mode in config.yaml should override the default chat_completions.""" + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr( + rp, "_get_model_config", + lambda: { + "provider": "custom", + "base_url": "http://127.0.0.1:9208/v1", + "api_mode": "codex_responses", + }, + ) + monkeypatch.setenv("OPENAI_BASE_URL", "http://127.0.0.1:9208/v1") + monkeypatch.setenv("OPENAI_API_KEY", "test-key") + monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + monkeypatch.delenv("HERMES_API_MODE", raising=False) + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert resolved["api_mode"] == "codex_responses" + assert resolved["base_url"] == "http://127.0.0.1:9208/v1" + + +def test_env_api_mode_overrides_config(monkeypatch): + """HERMES_API_MODE env var takes precedence over config.""" + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr(rp, "_get_model_config", lambda: {"api_mode": "chat_completions"}) + monkeypatch.setenv("OPENAI_BASE_URL", "http://127.0.0.1:9208/v1") + monkeypatch.setenv("OPENAI_API_KEY", "test-key") + monkeypatch.setenv("HERMES_API_MODE", "codex_responses") + monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert resolved["api_mode"] == "codex_responses" + + +def test_invalid_api_mode_ignored(monkeypatch): + """Invalid api_mode values should fall back to chat_completions.""" + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr(rp, "_get_model_config", lambda: {"api_mode": "bogus_mode"}) + monkeypatch.setenv("OPENAI_BASE_URL", "http://127.0.0.1:9208/v1") + monkeypatch.setenv("OPENAI_API_KEY", "test-key") + monkeypatch.delenv("HERMES_API_MODE", raising=False) + monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert resolved["api_mode"] == "chat_completions" + + +def test_named_custom_provider_respects_api_mode(monkeypatch): + """Named custom providers should also pick up api_mode overrides.""" + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "my-server") + monkeypatch.setattr( + rp, "_get_named_custom_provider", + lambda p: {"name": "my-server", "base_url": "http://localhost:8000/v1", "api_key": "sk-test"}, + ) + monkeypatch.setenv("HERMES_API_MODE", "codex_responses") + + resolved = rp.resolve_runtime_provider(requested="my-server") + + assert resolved["api_mode"] == "codex_responses" + assert resolved["base_url"] == "http://localhost:8000/v1"