fix: sanitize chat payloads and provider precedence (#1253)

fix: sanitize chat payloads and provider precedence
This commit is contained in:
Teknium 2026-03-14 00:09:14 -07:00 committed by GitHub
commit 29176f302e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 159 additions and 11 deletions

2
cli.py
View file

@ -1151,8 +1151,8 @@ class HermesCLI:
# Provider selection is resolved lazily at use-time via _ensure_runtime_credentials(). # Provider selection is resolved lazily at use-time via _ensure_runtime_credentials().
self.requested_provider = ( self.requested_provider = (
provider provider
or os.getenv("HERMES_INFERENCE_PROVIDER")
or CLI_CONFIG["model"].get("provider") or CLI_CONFIG["model"].get("provider")
or os.getenv("HERMES_INFERENCE_PROVIDER")
or "auto" or "auto"
) )
self._provider_source: Optional[str] = None self._provider_source: Optional[str] = None

View file

@ -746,8 +746,8 @@ def cmd_model(args):
config_provider = model_cfg.get("provider") config_provider = model_cfg.get("provider")
effective_provider = ( effective_provider = (
os.getenv("HERMES_INFERENCE_PROVIDER") config_provider
or config_provider or os.getenv("HERMES_INFERENCE_PROVIDER")
or "auto" or "auto"
) )
try: try:

View file

@ -29,19 +29,21 @@ def _get_model_config() -> Dict[str, Any]:
def resolve_requested_provider(requested: Optional[str] = None) -> str: def resolve_requested_provider(requested: Optional[str] = None) -> str:
"""Resolve provider request from explicit arg, env, then config.""" """Resolve provider request from explicit arg, config, then env."""
if requested and requested.strip(): if requested and requested.strip():
return requested.strip().lower() return requested.strip().lower()
env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
if env_provider:
return env_provider
model_cfg = _get_model_config() model_cfg = _get_model_config()
cfg_provider = model_cfg.get("provider") cfg_provider = model_cfg.get("provider")
if isinstance(cfg_provider, str) and cfg_provider.strip(): if isinstance(cfg_provider, str) and cfg_provider.strip():
return cfg_provider.strip().lower() return cfg_provider.strip().lower()
# Prefer the persisted config selection over any stale shell/.env
# provider override so chat uses the endpoint the user last saved.
env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
if env_provider:
return env_provider
return "auto" return "auto"

View file

@ -2748,6 +2748,42 @@ class AIAgent:
return kwargs return kwargs
sanitized_messages = api_messages
needs_sanitization = False
for msg in api_messages:
if not isinstance(msg, dict):
continue
if "codex_reasoning_items" in msg:
needs_sanitization = True
break
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tool_call in tool_calls:
if not isinstance(tool_call, dict):
continue
if "call_id" in tool_call or "response_item_id" in tool_call:
needs_sanitization = True
break
if needs_sanitization:
break
if needs_sanitization:
sanitized_messages = copy.deepcopy(api_messages)
for msg in sanitized_messages:
if not isinstance(msg, dict):
continue
# Codex-only replay state must not leak into strict chat-completions APIs.
msg.pop("codex_reasoning_items", None)
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tool_call in tool_calls:
if isinstance(tool_call, dict):
tool_call.pop("call_id", None)
tool_call.pop("response_item_id", None)
provider_preferences = {} provider_preferences = {}
if self.providers_allowed: if self.providers_allowed:
provider_preferences["only"] = self.providers_allowed provider_preferences["only"] = self.providers_allowed
@ -2764,7 +2800,7 @@ class AIAgent:
api_kwargs = { api_kwargs = {
"model": self.model, "model": self.model,
"messages": api_messages, "messages": sanitized_messages,
"tools": self.tools if self.tools else None, "tools": self.tools if self.tools else None,
"timeout": float(os.getenv("HERMES_API_TIMEOUT", 900.0)), "timeout": float(os.getenv("HERMES_API_TIMEOUT", 900.0)),
} }

View file

@ -3,7 +3,7 @@
import json import json
import os import os
from pathlib import Path from pathlib import Path
from multiprocessing import Lock from threading import Lock
from unittest.mock import patch, MagicMock from unittest.mock import patch, MagicMock
import pytest import pytest

View file

@ -162,6 +162,22 @@ def test_runtime_resolution_rebuilds_agent_on_routing_change(monkeypatch):
assert shell.api_mode == "codex_responses" assert shell.api_mode == "codex_responses"
def test_cli_prefers_config_provider_over_stale_env_override(monkeypatch):
cli = _import_cli()
monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "openrouter")
config_copy = dict(cli.CLI_CONFIG)
model_copy = dict(config_copy.get("model", {}))
model_copy["provider"] = "custom"
model_copy["base_url"] = "https://api.fireworks.ai/inference/v1"
config_copy["model"] = model_copy
monkeypatch.setattr(cli, "CLI_CONFIG", config_copy)
shell = cli.HermesCLI(model="fireworks/minimax-m2p5", compact=True, max_turns=1)
assert shell.requested_provider == "custom"
def test_codex_provider_replaces_incompatible_default_model(monkeypatch): def test_codex_provider_replaces_incompatible_default_model(monkeypatch):
"""When provider resolves to openai-codex and no model was explicitly """When provider resolves to openai-codex and no model was explicitly
chosen, the global config default (e.g. anthropic/claude-opus-4.6) must chosen, the global config default (e.g. anthropic/claude-opus-4.6) must

View file

@ -95,6 +95,47 @@ class TestBuildApiKwargsOpenRouter:
assert "instructions" not in kwargs assert "instructions" not in kwargs
assert "store" not in kwargs assert "store" not in kwargs
def test_strips_codex_only_tool_call_fields_from_chat_messages(self, monkeypatch):
agent = _make_agent(monkeypatch, "openrouter")
messages = [
{"role": "user", "content": "hi"},
{
"role": "assistant",
"content": "Checking now.",
"codex_reasoning_items": [
{"type": "reasoning", "id": "rs_1", "encrypted_content": "blob"},
],
"tool_calls": [
{
"id": "call_123",
"call_id": "call_123",
"response_item_id": "fc_123",
"type": "function",
"function": {"name": "terminal", "arguments": "{\"command\":\"pwd\"}"},
"extra_content": {"thought_signature": "opaque"},
}
],
},
{"role": "tool", "tool_call_id": "call_123", "content": "/tmp"},
]
kwargs = agent._build_api_kwargs(messages)
assistant_msg = kwargs["messages"][1]
tool_call = assistant_msg["tool_calls"][0]
assert "codex_reasoning_items" not in assistant_msg
assert tool_call["id"] == "call_123"
assert tool_call["function"]["name"] == "terminal"
assert tool_call["extra_content"] == {"thought_signature": "opaque"}
assert "call_id" not in tool_call
assert "response_item_id" not in tool_call
# Original stored history must remain unchanged for Responses replay mode.
assert messages[1]["tool_calls"][0]["call_id"] == "call_123"
assert messages[1]["tool_calls"][0]["response_item_id"] == "fc_123"
assert "codex_reasoning_items" in messages[1]
class TestBuildApiKwargsNousPortal: class TestBuildApiKwargsNousPortal:
def test_includes_nous_product_tags(self, monkeypatch): def test_includes_nous_product_tags(self, monkeypatch):
@ -127,6 +168,52 @@ class TestBuildApiKwargsCustomEndpoint:
extra = kwargs.get("extra_body", {}) extra = kwargs.get("extra_body", {})
assert "reasoning" not in extra assert "reasoning" not in extra
def test_fireworks_tool_call_payload_strips_codex_only_fields(self, monkeypatch):
agent = _make_agent(
monkeypatch,
"custom",
base_url="https://api.fireworks.ai/inference/v1",
)
messages = [
{"role": "user", "content": "hi"},
{
"role": "assistant",
"content": "Checking now.",
"codex_reasoning_items": [
{"type": "reasoning", "id": "rs_1", "encrypted_content": "blob"},
],
"tool_calls": [
{
"id": "call_fw_123",
"call_id": "call_fw_123",
"response_item_id": "fc_fw_123",
"type": "function",
"function": {
"name": "terminal",
"arguments": "{\"command\":\"pwd\"}",
},
}
],
},
{"role": "tool", "tool_call_id": "call_fw_123", "content": "/tmp"},
]
kwargs = agent._build_api_kwargs(messages)
assert kwargs["tools"][0]["function"]["name"] == "web_search"
assert "input" not in kwargs
assert kwargs.get("extra_body", {}) == {}
assistant_msg = kwargs["messages"][1]
tool_call = assistant_msg["tool_calls"][0]
assert "codex_reasoning_items" not in assistant_msg
assert tool_call["id"] == "call_fw_123"
assert tool_call["type"] == "function"
assert tool_call["function"]["name"] == "terminal"
assert "call_id" not in tool_call
assert "response_item_id" not in tool_call
class TestBuildApiKwargsCodex: class TestBuildApiKwargsCodex:
def test_uses_responses_api_format(self, monkeypatch): def test_uses_responses_api_format(self, monkeypatch):

View file

@ -181,3 +181,10 @@ def test_resolve_requested_provider_precedence(monkeypatch):
monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "nous") monkeypatch.setenv("HERMES_INFERENCE_PROVIDER", "nous")
monkeypatch.setattr(rp, "_get_model_config", lambda: {"provider": "openai-codex"}) monkeypatch.setattr(rp, "_get_model_config", lambda: {"provider": "openai-codex"})
assert rp.resolve_requested_provider("openrouter") == "openrouter" assert rp.resolve_requested_provider("openrouter") == "openrouter"
assert rp.resolve_requested_provider() == "openai-codex"
monkeypatch.setattr(rp, "_get_model_config", lambda: {})
assert rp.resolve_requested_provider() == "nous"
monkeypatch.delenv("HERMES_INFERENCE_PROVIDER", raising=False)
assert rp.resolve_requested_provider() == "auto"