feat: add direct endpoint overrides for auxiliary and delegation

Add base_url/api_key overrides for auxiliary tasks and delegation so users can
route those flows straight to a custom OpenAI-compatible endpoint without
having to rely on provider=main or named custom providers.

Also clear gateway session env vars in test isolation so the full suite stays
deterministic when run from a messaging-backed agent session.
This commit is contained in:
teknium1 2026-03-14 20:48:29 -07:00
parent 6c24d76533
commit 9f6bccd76a
12 changed files with 526 additions and 99 deletions

View file

@ -24,9 +24,11 @@ def _clean_env(monkeypatch):
for key in (
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
"OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
# Per-task provider/model overrides
# Per-task provider/model/direct-endpoint overrides
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
"AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
"AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
):
monkeypatch.delenv(key, raising=False)
@ -142,6 +144,27 @@ class TestGetTextAuxiliaryClient:
call_kwargs = mock_openai.call_args
assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
def test_task_direct_endpoint_override(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_API_KEY", "task-key")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client("web_extract")
assert model == "task-model"
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:2345/v1"
assert mock_openai.call_args.kwargs["api_key"] == "task-key"
def test_task_direct_endpoint_without_openai_key_does_not_fall_back(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_BASE_URL", "http://localhost:2345/v1")
monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_MODEL", "task-model")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client("web_extract")
assert client is None
assert model is None
mock_openai.assert_not_called()
def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
patch("agent.auxiliary_client.OpenAI") as mock_openai:
@ -194,6 +217,27 @@ class TestVisionClientFallback:
client, model = get_vision_auxiliary_client()
assert client is not None # Custom endpoint picked up as fallback
def test_vision_direct_endpoint_override(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
monkeypatch.setenv("AUXILIARY_VISION_API_KEY", "vision-key")
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_vision_auxiliary_client()
assert model == "vision-model"
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:4567/v1"
assert mock_openai.call_args.kwargs["api_key"] == "vision-key"
def test_vision_direct_endpoint_requires_openai_api_key(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("AUXILIARY_VISION_BASE_URL", "http://localhost:4567/v1")
monkeypatch.setenv("AUXILIARY_VISION_MODEL", "vision-model")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_vision_auxiliary_client()
assert client is None
assert model is None
mock_openai.assert_not_called()
def test_vision_uses_openrouter_when_available(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
@ -390,6 +434,24 @@ class TestTaskSpecificOverrides:
client, model = get_text_auxiliary_client("web_extract")
assert model == "google/gemini-3-flash-preview"
def test_task_direct_endpoint_from_config(self, monkeypatch, tmp_path):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "config.yaml").write_text(
"""auxiliary:
web_extract:
base_url: http://localhost:3456/v1
api_key: config-key
model: config-model
"""
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
client, model = get_text_auxiliary_client("web_extract")
assert model == "config-model"
assert mock_openai.call_args.kwargs["base_url"] == "http://localhost:3456/v1"
assert mock_openai.call_args.kwargs["api_key"] == "config-key"
def test_task_without_override_uses_auto(self, monkeypatch):
"""A task with no provider env var falls through to auto chain."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")

View file

@ -26,6 +26,12 @@ def _isolate_hermes_home(tmp_path, monkeypatch):
(fake_home / "memories").mkdir()
(fake_home / "skills").mkdir()
monkeypatch.setenv("HERMES_HOME", str(fake_home))
# Tests should not inherit the agent's current gateway/messaging surface.
# Individual tests that need gateway behavior set these explicitly.
monkeypatch.delenv("HERMES_SESSION_PLATFORM", raising=False)
monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
@pytest.fixture()

View file

@ -25,7 +25,9 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
# Clear env vars
for key in (
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
"AUXILIARY_VISION_BASE_URL", "AUXILIARY_VISION_API_KEY",
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
"AUXILIARY_WEB_EXTRACT_BASE_URL", "AUXILIARY_WEB_EXTRACT_API_KEY",
"CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL",
):
monkeypatch.delenv(key, raising=False)
@ -47,19 +49,35 @@ def _run_auxiliary_bridge(config_dict, monkeypatch):
auxiliary_cfg = config_dict.get("auxiliary", {})
if auxiliary_cfg and isinstance(auxiliary_cfg, dict):
aux_task_env = {
"vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"),
"web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"),
"vision": {
"provider": "AUXILIARY_VISION_PROVIDER",
"model": "AUXILIARY_VISION_MODEL",
"base_url": "AUXILIARY_VISION_BASE_URL",
"api_key": "AUXILIARY_VISION_API_KEY",
},
"web_extract": {
"provider": "AUXILIARY_WEB_EXTRACT_PROVIDER",
"model": "AUXILIARY_WEB_EXTRACT_MODEL",
"base_url": "AUXILIARY_WEB_EXTRACT_BASE_URL",
"api_key": "AUXILIARY_WEB_EXTRACT_API_KEY",
},
}
for task_key, (prov_env, model_env) in aux_task_env.items():
for task_key, env_map in aux_task_env.items():
task_cfg = auxiliary_cfg.get(task_key, {})
if not isinstance(task_cfg, dict):
continue
prov = str(task_cfg.get("provider", "")).strip()
model = str(task_cfg.get("model", "")).strip()
base_url = str(task_cfg.get("base_url", "")).strip()
api_key = str(task_cfg.get("api_key", "")).strip()
if prov and prov != "auto":
os.environ[prov_env] = prov
os.environ[env_map["provider"]] = prov
if model:
os.environ[model_env] = model
os.environ[env_map["model"]] = model
if base_url:
os.environ[env_map["base_url"]] = base_url
if api_key:
os.environ[env_map["api_key"]] = api_key
# ── Config bridging tests ────────────────────────────────────────────────────
@ -101,6 +119,21 @@ class TestAuxiliaryConfigBridge:
assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous"
assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash"
def test_direct_endpoint_bridged(self, monkeypatch):
config = {
"auxiliary": {
"vision": {
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
"model": "qwen2.5-vl",
}
}
}
_run_auxiliary_bridge(config, monkeypatch)
assert os.environ.get("AUXILIARY_VISION_BASE_URL") == "http://localhost:1234/v1"
assert os.environ.get("AUXILIARY_VISION_API_KEY") == "local-key"
assert os.environ.get("AUXILIARY_VISION_MODEL") == "qwen2.5-vl"
def test_compression_provider_bridged(self, monkeypatch):
config = {
"compression": {
@ -200,8 +233,12 @@ class TestGatewayBridgeCodeParity:
# Check for key patterns that indicate the bridge is present
assert "AUXILIARY_VISION_PROVIDER" in content
assert "AUXILIARY_VISION_MODEL" in content
assert "AUXILIARY_VISION_BASE_URL" in content
assert "AUXILIARY_VISION_API_KEY" in content
assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content
assert "AUXILIARY_WEB_EXTRACT_MODEL" in content
assert "AUXILIARY_WEB_EXTRACT_BASE_URL" in content
assert "AUXILIARY_WEB_EXTRACT_API_KEY" in content
def test_gateway_has_compression_provider(self):
"""Gateway must bridge compression.summary_provider."""

View file

@ -10,6 +10,7 @@ Run with: python -m pytest tests/test_delegate.py -v
"""
import json
import os
import sys
import unittest
from unittest.mock import MagicMock, patch
@ -462,6 +463,43 @@ class TestDelegationCredentialResolution(unittest.TestCase):
self.assertEqual(creds["api_mode"], "chat_completions")
mock_resolve.assert_called_once_with(requested="openrouter")
def test_direct_endpoint_uses_configured_base_url_and_api_key(self):
parent = _make_mock_parent(depth=0)
cfg = {
"model": "qwen2.5-coder",
"provider": "openrouter",
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
}
creds = _resolve_delegation_credentials(cfg, parent)
self.assertEqual(creds["model"], "qwen2.5-coder")
self.assertEqual(creds["provider"], "custom")
self.assertEqual(creds["base_url"], "http://localhost:1234/v1")
self.assertEqual(creds["api_key"], "local-key")
self.assertEqual(creds["api_mode"], "chat_completions")
def test_direct_endpoint_falls_back_to_openai_api_key_env(self):
parent = _make_mock_parent(depth=0)
cfg = {
"model": "qwen2.5-coder",
"base_url": "http://localhost:1234/v1",
}
with patch.dict(os.environ, {"OPENAI_API_KEY": "env-openai-key"}, clear=False):
creds = _resolve_delegation_credentials(cfg, parent)
self.assertEqual(creds["api_key"], "env-openai-key")
self.assertEqual(creds["provider"], "custom")
def test_direct_endpoint_does_not_fall_back_to_openrouter_api_key_env(self):
parent = _make_mock_parent(depth=0)
cfg = {
"model": "qwen2.5-coder",
"base_url": "http://localhost:1234/v1",
}
with patch.dict(os.environ, {"OPENROUTER_API_KEY": "env-openrouter-key"}, clear=False):
with self.assertRaises(ValueError) as ctx:
_resolve_delegation_credentials(cfg, parent)
self.assertIn("OPENAI_API_KEY", str(ctx.exception))
@patch("hermes_cli.runtime_provider.resolve_runtime_provider")
def test_nous_provider_resolves_nous_credentials(self, mock_resolve):
"""Nous provider resolves Nous Portal base_url and api_key."""
@ -589,6 +627,40 @@ class TestDelegationProviderIntegration(unittest.TestCase):
self.assertNotEqual(kwargs["base_url"], parent.base_url)
self.assertNotEqual(kwargs["api_key"], parent.api_key)
@patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_direct_endpoint_credentials_reach_child_agent(self, mock_creds, mock_cfg):
mock_cfg.return_value = {
"max_iterations": 45,
"model": "qwen2.5-coder",
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
}
mock_creds.return_value = {
"model": "qwen2.5-coder",
"provider": "custom",
"base_url": "http://localhost:1234/v1",
"api_key": "local-key",
"api_mode": "chat_completions",
}
parent = _make_mock_parent(depth=0)
with patch("run_agent.AIAgent") as MockAgent:
mock_child = MagicMock()
mock_child.run_conversation.return_value = {
"final_response": "done", "completed": True, "api_calls": 1
}
MockAgent.return_value = mock_child
delegate_task(goal="Direct endpoint test", parent_agent=parent)
_, kwargs = MockAgent.call_args
self.assertEqual(kwargs["model"], "qwen2.5-coder")
self.assertEqual(kwargs["provider"], "custom")
self.assertEqual(kwargs["base_url"], "http://localhost:1234/v1")
self.assertEqual(kwargs["api_key"], "local-key")
self.assertEqual(kwargs["api_mode"], "chat_completions")
@patch("tools.delegate_tool._load_config")
@patch("tools.delegate_tool._resolve_delegation_credentials")
def test_empty_config_inherits_parent(self, mock_creds, mock_cfg):