feat: add native Anthropic auxiliary vision

This commit is contained in:
teknium1 2026-03-14 21:14:20 -07:00
parent dc11b86e4b
commit db362dbd4c
7 changed files with 386 additions and 30 deletions

View file

@ -391,6 +391,68 @@ def _sanitize_tool_id(tool_id: str) -> str:
return sanitized or "tool_0" return sanitized or "tool_0"
def _convert_openai_image_part_to_anthropic(part: Dict[str, Any]) -> Optional[Dict[str, Any]]:
"""Convert an OpenAI-style image block to Anthropic's image source format."""
image_data = part.get("image_url", {})
url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data)
if not isinstance(url, str) or not url.strip():
return None
url = url.strip()
if url.startswith("data:"):
header, sep, data = url.partition(",")
if sep and ";base64" in header:
media_type = header[5:].split(";", 1)[0] or "image/png"
return {
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": data,
},
}
if url.startswith("http://") or url.startswith("https://"):
return {
"type": "image",
"source": {
"type": "url",
"url": url,
},
}
return None
def _convert_user_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
if isinstance(part, dict):
ptype = part.get("type")
if ptype == "text":
block = {"type": "text", "text": part.get("text", "")}
if isinstance(part.get("cache_control"), dict):
block["cache_control"] = dict(part["cache_control"])
return block
if ptype == "image_url":
return _convert_openai_image_part_to_anthropic(part)
if ptype == "image" and part.get("source"):
return dict(part)
if ptype == "image" and part.get("data"):
media_type = part.get("mimeType") or part.get("media_type") or "image/png"
return {
"type": "image",
"source": {
"type": "base64",
"media_type": media_type,
"data": part.get("data", ""),
},
}
if ptype == "tool_result":
return dict(part)
elif part is not None:
return {"type": "text", "text": str(part)}
return None
def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]: def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
"""Convert OpenAI tool definitions to Anthropic format.""" """Convert OpenAI tool definitions to Anthropic format."""
if not tools: if not tools:
@ -495,7 +557,15 @@ def convert_messages_to_anthropic(
continue continue
# Regular user message # Regular user message
result.append({"role": "user", "content": content}) if isinstance(content, list):
converted_blocks = []
for part in content:
converted = _convert_user_content_part_to_anthropic(part)
if converted is not None:
converted_blocks.append(converted)
result.append({"role": "user", "content": converted_blocks or [{"type": "text", "text": ""}]})
else:
result.append({"role": "user", "content": content})
# Strip orphaned tool_use blocks (no matching tool_result follows) # Strip orphaned tool_use blocks (no matching tool_result follows)
tool_result_ids = set() tool_result_ids = set()

View file

@ -1,4 +1,4 @@
"""Shared auxiliary OpenAI client for cheap/fast side tasks. """Shared auxiliary client router for side tasks.
Provides a single resolution chain so every consumer (context compression, Provides a single resolution chain so every consumer (context compression,
session search, web extraction, vision analysis, browser vision) picks up session search, web extraction, vision analysis, browser vision) picks up
@ -10,21 +10,21 @@ Resolution order for text tasks (auto mode):
3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) 3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex, 4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
wrapped to look like a chat.completions client) wrapped to look like a chat.completions client)
5. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, MiniMax-CN) 5. Native Anthropic
checked via PROVIDER_REGISTRY entries with auth_type='api_key' 6. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, MiniMax-CN)
6. None 7. None
Resolution order for vision/multimodal tasks (auto mode): Resolution order for vision/multimodal tasks (auto mode):
1. OpenRouter 1. Selected main provider, if it is one of the supported vision backends below
2. Nous Portal 2. OpenRouter
3. Codex OAuth (gpt-5.3-codex supports vision via Responses API) 3. Nous Portal
4. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.) 4. Codex OAuth (gpt-5.3-codex supports vision via Responses API)
5. None (API-key providers like z.ai/Kimi/MiniMax are skipped 5. Native Anthropic
they may not support multimodal) 6. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
7. None
Per-task provider overrides (e.g. AUXILIARY_VISION_PROVIDER, Per-task provider overrides (e.g. AUXILIARY_VISION_PROVIDER,
CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task: CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task.
"openrouter", "nous", "codex", or "main" (= steps 3-5).
Default "auto" follows the chains above. Default "auto" follows the chains above.
Per-task model overrides (e.g. AUXILIARY_VISION_MODEL, Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
@ -74,6 +74,7 @@ auxiliary_is_nous: bool = False
_OPENROUTER_MODEL = "google/gemini-3-flash-preview" _OPENROUTER_MODEL = "google/gemini-3-flash-preview"
_NOUS_MODEL = "gemini-3-flash" _NOUS_MODEL = "gemini-3-flash"
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1" _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
_ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
_AUTH_JSON_PATH = get_hermes_home() / "auth.json" _AUTH_JSON_PATH = get_hermes_home() / "auth.json"
# Codex fallback: uses the Responses API (the only endpoint the Codex # Codex fallback: uses the Responses API (the only endpoint the Codex
@ -309,6 +310,114 @@ class AsyncCodexAuxiliaryClient:
self.base_url = sync_wrapper.base_url self.base_url = sync_wrapper.base_url
class _AnthropicCompletionsAdapter:
"""OpenAI-client-compatible adapter for Anthropic Messages API."""
def __init__(self, real_client: Any, model: str):
self._client = real_client
self._model = model
def create(self, **kwargs) -> Any:
from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response
messages = kwargs.get("messages", [])
model = kwargs.get("model", self._model)
tools = kwargs.get("tools")
tool_choice = kwargs.get("tool_choice")
max_tokens = kwargs.get("max_tokens") or kwargs.get("max_completion_tokens") or 2000
temperature = kwargs.get("temperature")
normalized_tool_choice = None
if isinstance(tool_choice, str):
normalized_tool_choice = tool_choice
elif isinstance(tool_choice, dict):
choice_type = str(tool_choice.get("type", "")).lower()
if choice_type == "function":
normalized_tool_choice = tool_choice.get("function", {}).get("name")
elif choice_type in {"auto", "required", "none"}:
normalized_tool_choice = choice_type
anthropic_kwargs = build_anthropic_kwargs(
model=model,
messages=messages,
tools=tools,
max_tokens=max_tokens,
reasoning_config=None,
tool_choice=normalized_tool_choice,
)
if temperature is not None:
anthropic_kwargs["temperature"] = temperature
response = self._client.messages.create(**anthropic_kwargs)
assistant_message, finish_reason = normalize_anthropic_response(response)
usage = None
if hasattr(response, "usage") and response.usage:
prompt_tokens = getattr(response.usage, "input_tokens", 0) or 0
completion_tokens = getattr(response.usage, "output_tokens", 0) or 0
total_tokens = getattr(response.usage, "total_tokens", 0) or (prompt_tokens + completion_tokens)
usage = SimpleNamespace(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
)
choice = SimpleNamespace(
index=0,
message=assistant_message,
finish_reason=finish_reason,
)
return SimpleNamespace(
choices=[choice],
model=model,
usage=usage,
)
class _AnthropicChatShim:
def __init__(self, adapter: _AnthropicCompletionsAdapter):
self.completions = adapter
class AnthropicAuxiliaryClient:
"""OpenAI-client-compatible wrapper over a native Anthropic client."""
def __init__(self, real_client: Any, model: str, api_key: str, base_url: str):
self._real_client = real_client
adapter = _AnthropicCompletionsAdapter(real_client, model)
self.chat = _AnthropicChatShim(adapter)
self.api_key = api_key
self.base_url = base_url
def close(self):
close_fn = getattr(self._real_client, "close", None)
if callable(close_fn):
close_fn()
class _AsyncAnthropicCompletionsAdapter:
def __init__(self, sync_adapter: _AnthropicCompletionsAdapter):
self._sync = sync_adapter
async def create(self, **kwargs) -> Any:
import asyncio
return await asyncio.to_thread(self._sync.create, **kwargs)
class _AsyncAnthropicChatShim:
def __init__(self, adapter: _AsyncAnthropicCompletionsAdapter):
self.completions = adapter
class AsyncAnthropicAuxiliaryClient:
def __init__(self, sync_wrapper: "AnthropicAuxiliaryClient"):
sync_adapter = sync_wrapper.chat.completions
async_adapter = _AsyncAnthropicCompletionsAdapter(sync_adapter)
self.chat = _AsyncAnthropicChatShim(async_adapter)
self.api_key = sync_wrapper.api_key
self.base_url = sync_wrapper.base_url
def _read_nous_auth() -> Optional[dict]: def _read_nous_auth() -> Optional[dict]:
"""Read and validate ~/.hermes/auth.json for an active Nous provider. """Read and validate ~/.hermes/auth.json for an active Nous provider.
@ -380,6 +489,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
break break
if not api_key: if not api_key:
continue continue
if provider_id == "anthropic":
return _try_anthropic()
# Resolve base URL (with optional env-var override) # Resolve base URL (with optional env-var override)
# Kimi Code keys (sk-kimi-) need api.kimi.com/coding/v1 # Kimi Code keys (sk-kimi-) need api.kimi.com/coding/v1
env_url = "" env_url = ""
@ -484,6 +596,22 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
try:
from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
except ImportError:
return None, None
token = resolve_anthropic_token()
if not token:
return None, None
model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
logger.debug("Auxiliary client: Anthropic native (%s)", model)
real_client = build_anthropic_client(token, _ANTHROPIC_DEFAULT_BASE_URL)
return AnthropicAuxiliaryClient(real_client, model, token, _ANTHROPIC_DEFAULT_BASE_URL), model
def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[str]]: def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[str]]:
"""Resolve a specific forced provider. Returns (None, None) if creds missing.""" """Resolve a specific forced provider. Returns (None, None) if creds missing."""
if forced == "openrouter": if forced == "openrouter":
@ -546,6 +674,8 @@ def _to_async_client(sync_client, model: str):
if isinstance(sync_client, CodexAuxiliaryClient): if isinstance(sync_client, CodexAuxiliaryClient):
return AsyncCodexAuxiliaryClient(sync_client), model return AsyncCodexAuxiliaryClient(sync_client), model
if isinstance(sync_client, AnthropicAuxiliaryClient):
return AsyncAnthropicAuxiliaryClient(sync_client), model
async_kwargs = { async_kwargs = {
"api_key": sync_client.api_key, "api_key": sync_client.api_key,
@ -686,6 +816,14 @@ def resolve_provider_client(
return None, None return None, None
if pconfig.auth_type == "api_key": if pconfig.auth_type == "api_key":
if provider == "anthropic":
client, default_model = _try_anthropic()
if client is None:
logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
return None, None
final_model = model or default_model
return (_to_async_client(client, final_model) if async_mode else (client, final_model))
# Find the first configured API key # Find the first configured API key
api_key = "" api_key = ""
for env_var in pconfig.api_key_env_vars: for env_var in pconfig.api_key_env_vars:
@ -772,6 +910,7 @@ _VISION_AUTO_PROVIDER_ORDER = (
"openrouter", "openrouter",
"nous", "nous",
"openai-codex", "openai-codex",
"anthropic",
"custom", "custom",
) )
@ -793,6 +932,8 @@ def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Option
return _try_nous() return _try_nous()
if provider == "openai-codex": if provider == "openai-codex":
return _try_codex() return _try_codex()
if provider == "anthropic":
return _try_anthropic()
if provider == "custom": if provider == "custom":
return _try_custom_endpoint() return _try_custom_endpoint()
return None, None return None, None
@ -802,19 +943,36 @@ def _strict_vision_backend_available(provider: str) -> bool:
return _resolve_strict_vision_backend(provider)[0] is not None return _resolve_strict_vision_backend(provider)[0] is not None
def _preferred_main_vision_provider() -> Optional[str]:
"""Return the selected main provider when it is also a supported vision backend."""
try:
from hermes_cli.config import load_config
config = load_config()
model_cfg = config.get("model", {})
if isinstance(model_cfg, dict):
provider = _normalize_vision_provider(model_cfg.get("provider", ""))
if provider in _VISION_AUTO_PROVIDER_ORDER:
return provider
except Exception:
pass
return None
def get_available_vision_backends() -> List[str]: def get_available_vision_backends() -> List[str]:
"""Return the currently available vision backends in auto-selection order. """Return the currently available vision backends in auto-selection order.
This is the single source of truth for setup, tool gating, and runtime This is the single source of truth for setup, tool gating, and runtime
auto-routing of vision tasks. Phase 1 keeps the auto list conservative: auto-routing of vision tasks. The selected main provider is preferred when
OpenRouter, Nous Portal, Codex OAuth, then custom OpenAI-compatible it is also a known-good vision backend; otherwise Hermes falls back through
endpoints. Explicit provider overrides can still route elsewhere. the standard conservative order.
""" """
return [ ordered = list(_VISION_AUTO_PROVIDER_ORDER)
provider preferred = _preferred_main_vision_provider()
for provider in _VISION_AUTO_PROVIDER_ORDER if preferred in ordered:
if _strict_vision_backend_available(provider) ordered.remove(preferred)
] ordered.insert(0, preferred)
return [provider for provider in ordered if _strict_vision_backend_available(provider)]
def resolve_vision_provider_client( def resolve_vision_provider_client(

View file

@ -1268,11 +1268,9 @@ def setup_model_provider(config: dict):
_vision_needs_setup = not bool(_vision_backends) _vision_needs_setup = not bool(_vision_backends)
if selected_provider in {"openrouter", "nous", "openai-codex"}: if selected_provider in _vision_backends:
# If the user just selected one of our known-good vision backends during # If the user just selected a backend Hermes can already use for
# setup, treat vision as covered. Auth/setup failure returns earlier. # vision, treat it as covered. Auth/setup failure returns earlier.
_vision_needs_setup = False
elif selected_provider == "custom" and "custom" in _vision_backends:
_vision_needs_setup = False _vision_needs_setup = False
if _vision_needs_setup: if _vision_needs_setup:

View file

@ -10,6 +10,8 @@ import pytest
from agent.auxiliary_client import ( from agent.auxiliary_client import (
get_text_auxiliary_client, get_text_auxiliary_client,
get_vision_auxiliary_client, get_vision_auxiliary_client,
get_available_vision_backends,
resolve_provider_client,
auxiliary_max_tokens_param, auxiliary_max_tokens_param,
_read_codex_access_token, _read_codex_access_token,
_get_auxiliary_provider, _get_auxiliary_provider,
@ -24,6 +26,7 @@ def _clean_env(monkeypatch):
for key in ( for key in (
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY", "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
"OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL", "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
"ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN",
# Per-task provider/model overrides # Per-task provider/model overrides
"AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL", "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL",
"AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL", "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL",
@ -164,14 +167,74 @@ class TestGetTextAuxiliaryClient:
class TestVisionClientFallback: class TestVisionClientFallback:
"""Vision client auto mode only tries OpenRouter + Nous (multimodal-capable).""" """Vision client auto mode resolves known-good multimodal backends."""
def test_vision_returns_none_without_any_credentials(self): def test_vision_returns_none_without_any_credentials(self):
with patch("agent.auxiliary_client._read_nous_auth", return_value=None): with (
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
patch("agent.auxiliary_client._try_anthropic", return_value=(None, None)),
):
client, model = get_vision_auxiliary_client() client, model = get_vision_auxiliary_client()
assert client is None assert client is None
assert model is None assert model is None
def test_vision_auto_includes_anthropic_when_configured(self, monkeypatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
with (
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
):
backends = get_available_vision_backends()
assert "anthropic" in backends
def test_resolve_provider_client_returns_native_anthropic_wrapper(self, monkeypatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
with (
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
):
client, model = resolve_provider_client("anthropic")
assert client is not None
assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
assert model == "claude-haiku-4-5-20251001"
def test_vision_auto_uses_anthropic_when_no_higher_priority_backend(self, monkeypatch):
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
with (
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
):
client, model = get_vision_auxiliary_client()
assert client is not None
assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
assert model == "claude-haiku-4-5-20251001"
def test_selected_anthropic_provider_is_preferred_for_vision_auto(self, monkeypatch):
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
def fake_load_config():
return {"model": {"provider": "anthropic", "default": "claude-sonnet-4-6"}}
with (
patch("agent.auxiliary_client._read_nous_auth", return_value=None),
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()),
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-key"),
patch("agent.auxiliary_client.OpenAI") as mock_openai,
patch("hermes_cli.config.load_config", fake_load_config),
):
client, model = get_vision_auxiliary_client()
assert client is not None
assert client.__class__.__name__ == "AnthropicAuxiliaryClient"
assert model == "claude-haiku-4-5-20251001"
def test_vision_auto_includes_codex(self, codex_auth_dir): def test_vision_auto_includes_codex(self, codex_auth_dir):
"""Codex supports vision (gpt-5.3-codex), so auto mode should use it.""" """Codex supports vision (gpt-5.3-codex), so auto mode should use it."""
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \

View file

@ -111,6 +111,7 @@ def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tm
monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None) monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
monkeypatch.setattr("hermes_cli.models.provider_model_ids", lambda provider: []) monkeypatch.setattr("hermes_cli.models.provider_model_ids", lambda provider: [])
monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])
setup_model_provider(config) setup_model_provider(config)
save_config(config) save_config(config)
@ -149,6 +150,7 @@ def test_setup_keep_current_anthropic_can_configure_openai_vision_default(tmp_pa
monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None) monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: []) monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
monkeypatch.setattr("hermes_cli.models.provider_model_ids", lambda provider: []) monkeypatch.setattr("hermes_cli.models.provider_model_ids", lambda provider: [])
monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])
setup_model_provider(config) setup_model_provider(config)
env = _read_env(tmp_path) env = _read_env(tmp_path)
@ -224,3 +226,17 @@ def test_setup_summary_marks_codex_auth_as_vision_available(tmp_path, monkeypatc
assert "missing run 'hermes setup' to configure" not in output assert "missing run 'hermes setup' to configure" not in output
assert "Mixture of Agents" in output assert "Mixture of Agents" in output
assert "missing OPENROUTER_API_KEY" in output assert "missing OPENROUTER_API_KEY" in output
def test_setup_summary_marks_anthropic_auth_as_vision_available(tmp_path, monkeypatch, capsys):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
_clear_provider_env(monkeypatch)
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-key")
monkeypatch.setattr("shutil.which", lambda _name: None)
monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: ["anthropic"])
_print_setup_summary(load_config(), tmp_path)
output = capsys.readouterr().out
assert "Vision (image analysis)" in output
assert "missing run 'hermes setup' to configure" not in output

View file

@ -567,6 +567,56 @@ class TestConvertMessages:
assert tool_block["content"] == "result" assert tool_block["content"] == "result"
assert tool_block["cache_control"] == {"type": "ephemeral"} assert tool_block["cache_control"] == {"type": "ephemeral"}
def test_converts_data_url_image_to_anthropic_image_block(self):
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image"},
{
"type": "image_url",
"image_url": {"url": "data:image/png;base64,ZmFrZQ=="},
},
],
}
]
_, result = convert_messages_to_anthropic(messages)
blocks = result[0]["content"]
assert blocks[0] == {"type": "text", "text": "Describe this image"}
assert blocks[1] == {
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": "ZmFrZQ==",
},
}
def test_converts_remote_image_url_to_anthropic_image_block(self):
messages = [
{
"role": "user",
"content": [
{"type": "text", "text": "Describe this image"},
{
"type": "image_url",
"image_url": {"url": "https://example.com/cat.png"},
},
],
}
]
_, result = convert_messages_to_anthropic(messages)
blocks = result[0]["content"]
assert blocks[1] == {
"type": "image",
"source": {
"type": "url",
"url": "https://example.com/cat.png",
},
}
def test_empty_cached_assistant_tool_turn_converts_without_empty_text_block(self): def test_empty_cached_assistant_tool_turn_converts_without_empty_text_block(self):
messages = apply_anthropic_cache_control([ messages = apply_anthropic_cache_control([
{"role": "system", "content": "System prompt"}, {"role": "system", "content": "System prompt"},

View file

@ -3,7 +3,8 @@
Vision Tools Module Vision Tools Module
This module provides vision analysis tools that work with image URLs. This module provides vision analysis tools that work with image URLs.
Uses Gemini 3 Flash Preview via OpenRouter API for intelligent image understanding. Uses the centralized auxiliary vision router, which can select OpenRouter,
Nous, Codex, native Anthropic, or a custom OpenAI-compatible endpoint.
Available tools: Available tools:
- vision_analyze_tool: Analyze images from URLs with custom prompts - vision_analyze_tool: Analyze images from URLs with custom prompts
@ -409,7 +410,7 @@ if __name__ == "__main__":
if not api_available: if not api_available:
print("❌ No auxiliary vision model available") print("❌ No auxiliary vision model available")
print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.") print("Configure a supported multimodal backend (OpenRouter, Nous, Codex, Anthropic, or a custom OpenAI-compatible endpoint).")
exit(1) exit(1)
else: else:
print("✅ Vision model available") print("✅ Vision model available")