fix: convert anthropic image content blocks
This commit is contained in:
parent
4f4e2671ac
commit
735a6e7651
7 changed files with 343 additions and 15 deletions
|
|
@ -169,7 +169,7 @@ class TestGetTextAuxiliaryClient:
|
|||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
patch("agent.auxiliary_client.OpenAI") as mock_openai:
|
||||
client, model = get_text_auxiliary_client()
|
||||
assert model == "gpt-5.3-codex"
|
||||
assert model == "gpt-5.2-codex"
|
||||
# Returns a CodexAuxiliaryClient wrapper, not a raw OpenAI client
|
||||
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
|
|
@ -202,7 +202,7 @@ class TestVisionClientFallback:
|
|||
client, model = get_vision_auxiliary_client()
|
||||
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-5.3-codex"
|
||||
assert model == "gpt-5.2-codex"
|
||||
|
||||
def test_vision_auto_falls_back_to_custom_endpoint(self, monkeypatch):
|
||||
"""Custom endpoint is used as fallback in vision auto mode.
|
||||
|
|
@ -264,7 +264,7 @@ class TestVisionClientFallback:
|
|||
client, model = get_vision_auxiliary_client()
|
||||
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-5.3-codex"
|
||||
assert model == "gpt-5.2-codex"
|
||||
|
||||
|
||||
class TestGetAuxiliaryProvider:
|
||||
|
|
@ -382,7 +382,7 @@ class TestResolveForcedProvider:
|
|||
client, model = _resolve_forced_provider("main")
|
||||
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-5.3-codex"
|
||||
assert model == "gpt-5.2-codex"
|
||||
|
||||
def test_forced_codex(self, codex_auth_dir, monkeypatch):
|
||||
with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
|
||||
|
|
@ -390,7 +390,7 @@ class TestResolveForcedProvider:
|
|||
client, model = _resolve_forced_provider("codex")
|
||||
from agent.auxiliary_client import CodexAuxiliaryClient
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
assert model == "gpt-5.3-codex"
|
||||
assert model == "gpt-5.2-codex"
|
||||
|
||||
def test_forced_codex_no_token(self, monkeypatch):
|
||||
with patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
|
||||
|
|
|
|||
|
|
@ -467,6 +467,59 @@ class TestConvertMessages:
|
|||
assert len(result) == 1
|
||||
assert result[0]["role"] == "user"
|
||||
|
||||
def test_converts_user_image_url_blocks_to_anthropic_image_blocks(self):
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "Can you see this?"},
|
||||
{"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
_, result = convert_messages_to_anthropic(messages)
|
||||
|
||||
assert result == [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "Can you see this?"},
|
||||
{"type": "image", "source": {"type": "url", "url": "https://example.com/cat.png"}},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
def test_converts_data_url_image_blocks_to_base64_anthropic_image_blocks(self):
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "input_text", "text": "What is in this screenshot?"},
|
||||
{"type": "input_image", "image_url": "data:image/png;base64,AAAA"},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
_, result = convert_messages_to_anthropic(messages)
|
||||
|
||||
assert result == [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "What is in this screenshot?"},
|
||||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": "image/png",
|
||||
"data": "AAAA",
|
||||
},
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
def test_converts_tool_calls(self):
|
||||
messages = [
|
||||
{
|
||||
|
|
|
|||
|
|
@ -543,7 +543,7 @@ class TestAuxiliaryClientProviderPriority:
|
|||
patch("agent.auxiliary_client._read_codex_access_token", return_value="codex-tok"), \
|
||||
patch("agent.auxiliary_client.OpenAI"):
|
||||
client, model = get_text_auxiliary_client()
|
||||
assert model == "gpt-5.3-codex"
|
||||
assert model == "gpt-5.2-codex"
|
||||
assert isinstance(client, CodexAuxiliaryClient)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import uuid
|
|||
from logging.handlers import RotatingFileHandler
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock, patch
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
|
@ -1986,6 +1986,69 @@ class TestBuildApiKwargsAnthropicMaxTokens:
|
|||
assert call_args[0][3] is None
|
||||
|
||||
|
||||
class TestAnthropicImageFallback:
|
||||
def test_build_api_kwargs_converts_multimodal_user_image_to_text(self, agent):
|
||||
agent.api_mode = "anthropic_messages"
|
||||
agent.reasoning_config = None
|
||||
|
||||
api_messages = [{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "Can you see this now?"},
|
||||
{"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}},
|
||||
],
|
||||
}]
|
||||
|
||||
with (
|
||||
patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=json.dumps({"success": True, "analysis": "A cat sitting on a chair."}))),
|
||||
patch("agent.anthropic_adapter.build_anthropic_kwargs") as mock_build,
|
||||
):
|
||||
mock_build.return_value = {"model": "claude-sonnet-4-20250514", "messages": [], "max_tokens": 4096}
|
||||
agent._build_api_kwargs(api_messages)
|
||||
|
||||
kwargs = mock_build.call_args.kwargs or dict(zip(
|
||||
["model", "messages", "tools", "max_tokens", "reasoning_config"],
|
||||
mock_build.call_args.args,
|
||||
))
|
||||
transformed = kwargs["messages"]
|
||||
assert isinstance(transformed[0]["content"], str)
|
||||
assert "A cat sitting on a chair." in transformed[0]["content"]
|
||||
assert "Can you see this now?" in transformed[0]["content"]
|
||||
assert "vision_analyze with image_url: https://example.com/cat.png" in transformed[0]["content"]
|
||||
|
||||
def test_build_api_kwargs_reuses_cached_image_analysis_for_duplicate_images(self, agent):
|
||||
agent.api_mode = "anthropic_messages"
|
||||
agent.reasoning_config = None
|
||||
data_url = "data:image/png;base64,QUFBQQ=="
|
||||
|
||||
api_messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "first"},
|
||||
{"type": "input_image", "image_url": data_url},
|
||||
],
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "second"},
|
||||
{"type": "input_image", "image_url": data_url},
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
mock_vision = AsyncMock(return_value=json.dumps({"success": True, "analysis": "A small test image."}))
|
||||
with (
|
||||
patch("tools.vision_tools.vision_analyze_tool", new=mock_vision),
|
||||
patch("agent.anthropic_adapter.build_anthropic_kwargs") as mock_build,
|
||||
):
|
||||
mock_build.return_value = {"model": "claude-sonnet-4-20250514", "messages": [], "max_tokens": 4096}
|
||||
agent._build_api_kwargs(api_messages)
|
||||
|
||||
assert mock_vision.await_count == 1
|
||||
|
||||
|
||||
class TestFallbackAnthropicProvider:
|
||||
"""Bug fix: _try_activate_fallback had no case for anthropic provider."""
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue