feat: add native Anthropic auxiliary vision

This commit is contained in:
teknium1 2026-03-14 21:14:20 -07:00
parent dc11b86e4b
commit db362dbd4c
7 changed files with 386 additions and 30 deletions

View file

@ -10,6 +10,8 @@ import pytest
from agent.auxiliary_client import (
get_text_auxiliary_client,
get_vision_auxiliary_client,
get_available_vision_backends,
resolve_provider_client,
auxiliary_max_tokens_param,
_read_codex_access_token,
_get_auxiliary_provider,
@ -24,6 +26,7 @@ def _clean_env(monkeypatch):
for key in (
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
"OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
"ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN",
# Per-task provider/model overrides
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
@ -164,14 +167,74 @@ class TestGetTextAuxiliaryClient:
class TestVisionClientFallback:
"""Vision client auto mode only tries OpenRouter + Nous (multimodal-capable)."""
"""Vision client auto mode resolves known-good multimodal backends."""
def test_vision_returns_none_without_any_credentials(self):
with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
with (
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
patch("agent.auxiliary_client._try_anthropic", return_value=(None, None)),
):
client, model = get_vision_auxiliary_client()
assert client is None
assert model is None
def test_vision_auto_includes_anthropic_when_configured(self, monkeypatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
with (
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
):
backends = get_available_vision_backends()
assert "anthropic" in backends
def test_resolve_provider_client_returns_native_anthropic_wrapper(self, monkeypatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
with (
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
):
client, model = resolve_provider_client("anthropic")
assert client is not None
assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
assert model == "claude-haiku-4-5-20251001"
def test_vision_auto_uses_anthropic_when_no_higher_priority_backend(self, monkeypatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
with (
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
):
client, model = get_vision_auxiliary_client()
assert client is not None
assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
assert model == "claude-haiku-4-5-20251001"
def test_selected_anthropic_provider_is_preferred_for_vision_auto(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
def fake_load_config():
return {"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"}}
with (
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
patch("agent.auxiliary_client.OpenAI") as mock_openai,
patch("hermes_cli.config.load_config", fake_load_config),
):
client, model = get_vision_auxiliary_client()
assert client is not None
assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
assert model == "claude-haiku-4-5-20251001"
def test_vision_auto_includes_codex(self, codex_auth_dir):
"""Codex supports vision (gpt-5.3-codex), so auto mode should use it."""
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \

View file

@ -111,6 +111,7 @@ def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tm
monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
monkeypatch.setattr("hermes_cli.models.provider_model_ids", lambda provider: [])
monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])
setup_model_provider(config)
save_config(config)
@ -149,6 +150,7 @@ def test_setup_keep_current_anthropic_can_configure_openai_vision_default(tmp_pa
monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
monkeypatch.setattr("hermes_cli.models.provider_model_ids", lambda provider: [])
monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])
setup_model_provider(config)
env = _read_env(tmp_path)
@ -224,3 +226,17 @@ def test_setup_summary_marks_codex_auth_as_vision_available(tmp_path, monkeypatc
assert "missing run 'hermes setup' to configure" not in output
assert "Mixture of Agents" in output
assert "missing OPENROUTER_API_KEY" in output
def test_setup_summary_marks_anthropic_auth_as_vision_available(tmp_path, monkeypatch, capsys):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_clear_provider_env(monkeypatch)
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
monkeypatch.setattr("shutil.which", lambda _name: None)
monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: ["anthropic"])
_print_setup_summary(load_config(), tmp_path)
output = capsys.readouterr().out
assert "Vision (image analysis)" in output
assert "missing run 'hermes setup' to configure" not in output

View file

@ -567,6 +567,56 @@ class TestConvertMessages:
assert tool_block["content"] == "result"
assert tool_block["cache_control"] == {"type": "ephemeral"}
def test_converts_data_url_image_to_anthropic_image_block(self):
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image"},
{
"type": "image_url",
"image_url": {"url": "data:image/png;base64,ZmFrZQ=="},
},
],
}
]
_, result = convert_messages_to_anthropic(messages)
blocks = result[0]["content"]
assert blocks[0] == {"type": "text", "text": "Describe this image"}
assert blocks[1] == {
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": "ZmFrZQ==",
},
}
def test_converts_remote_image_url_to_anthropic_image_block(self):
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image"},
{
"type": "image_url",
"image_url": {"url": "https://example.com/cat.png"},
},
],
}
]
_, result = convert_messages_to_anthropic(messages)
blocks = result[0]["content"]
assert blocks[1] == {
"type": "image",
"source": {
"type": "url",
"url": "https://example.com/cat.png",
},
}
def test_empty_cached_assistant_tool_turn_converts_without_empty_text_block(self):
messages = apply_anthropic_cache_control([
{"role": "system", "content": "System prompt"},