fix(anthropic): address gaps found in deep-dive audit
After studying clawdbot (OpenClaw) and OpenCode implementations: ## Beta headers - Add interleaved-thinking-2025-05-14 and fine-grained-tool-streaming-2025-05-14 as common betas (sent with ALL auth types, not just OAuth) - OAuth tokens additionally get oauth-2025-04-20 - API keys now also get the common betas (previously got none) ## Vision/image support - Add _convert_vision_content() to convert OpenAI multimodal format (image_url blocks) to Anthropic format (image blocks with base64/url source) - Handles both data: URIs (base64) and regular URLs ## Role alternation enforcement - Anthropic strictly rejects consecutive same-role messages (400 error) - Add post-processing step that merges consecutive user/assistant messages - Handles string, list, and mixed content types during merge ## Tool choice support - Add tool_choice parameter to build_anthropic_kwargs() - Maps OpenAI values: auto→auto, required→any, none→omit, name→tool ## Cache metrics tracking - Anthropic uses cache_read_input_tokens / cache_creation_input_tokens (different from OpenRouter's prompt_tokens_details.cached_tokens) - Add api_mode-aware branch in run_agent.py cache stats logging ## Credential refresh on 401 - On 401 error during anthropic_messages mode, re-read credentials via resolve_anthropic_token() (picks up refreshed Claude Code tokens) - Rebuild client if new token differs from current one - Follows same pattern as Codex/Nous 401 refresh handlers ## Tests - 44 adapter tests (8 new: vision conversion, role alternation, tool choice) - Updated beta header tests to verify new structure - Full suite: 3198 passed, 0 regressions
This commit is contained in:
parent
5e12442b4b
commit
d7adfe8f61
3 changed files with 262 additions and 11 deletions
|
|
@ -26,8 +26,16 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000}
|
THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000}
|
||||||
|
|
||||||
# Beta headers required for OAuth/subscription auth
|
# Beta headers for enhanced features (sent with ALL auth types)
|
||||||
_OAUTH_BETAS = ["oauth-2025-04-20"]
|
_COMMON_BETAS = [
|
||||||
|
"interleaved-thinking-2025-05-14",
|
||||||
|
"fine-grained-tool-streaming-2025-05-14",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Additional beta headers required for OAuth/subscription auth
|
||||||
|
_OAUTH_ONLY_BETAS = [
|
||||||
|
"oauth-2025-04-20",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def _is_oauth_token(key: str) -> bool:
|
def _is_oauth_token(key: str) -> bool:
|
||||||
|
|
@ -54,12 +62,15 @@ def build_anthropic_client(api_key: str, base_url: str = None):
|
||||||
kwargs["base_url"] = base_url
|
kwargs["base_url"] = base_url
|
||||||
|
|
||||||
if _is_oauth_token(api_key):
|
if _is_oauth_token(api_key):
|
||||||
# OAuth access token / setup-token → Bearer auth + beta header
|
# OAuth access token / setup-token → Bearer auth + beta headers
|
||||||
|
all_betas = _COMMON_BETAS + _OAUTH_ONLY_BETAS
|
||||||
kwargs["auth_token"] = api_key
|
kwargs["auth_token"] = api_key
|
||||||
kwargs["default_headers"] = {"anthropic-beta": ",".join(_OAUTH_BETAS)}
|
kwargs["default_headers"] = {"anthropic-beta": ",".join(all_betas)}
|
||||||
else:
|
else:
|
||||||
# Regular API key → x-api-key header
|
# Regular API key → x-api-key header + common betas
|
||||||
kwargs["api_key"] = api_key
|
kwargs["api_key"] = api_key
|
||||||
|
if _COMMON_BETAS:
|
||||||
|
kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)}
|
||||||
|
|
||||||
return _anthropic_sdk.Anthropic(**kwargs)
|
return _anthropic_sdk.Anthropic(**kwargs)
|
||||||
|
|
||||||
|
|
@ -173,6 +184,58 @@ def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _convert_vision_content(content: Any) -> Any:
|
||||||
|
"""Convert OpenAI multimodal content blocks to Anthropic format.
|
||||||
|
|
||||||
|
OpenAI format: [{"type": "image_url", "image_url": {"url": "data:...;base64,..."}}]
|
||||||
|
Anthropic format: [{"type": "image", "source": {"type": "base64", ...}}]
|
||||||
|
"""
|
||||||
|
if not isinstance(content, list):
|
||||||
|
return content
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for block in content:
|
||||||
|
if not isinstance(block, dict):
|
||||||
|
result.append(block)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if block.get("type") == "image_url":
|
||||||
|
image_url = block.get("image_url", {})
|
||||||
|
url = image_url.get("url", "") if isinstance(image_url, dict) else ""
|
||||||
|
|
||||||
|
if url.startswith("data:"):
|
||||||
|
# data:image/png;base64,iVBOR...
|
||||||
|
try:
|
||||||
|
header, b64_data = url.split(",", 1)
|
||||||
|
media_type = header.split(":")[1].split(";")[0]
|
||||||
|
result.append({
|
||||||
|
"type": "image",
|
||||||
|
"source": {
|
||||||
|
"type": "base64",
|
||||||
|
"media_type": media_type,
|
||||||
|
"data": b64_data,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
logger.warning("Could not parse data URL for image, skipping")
|
||||||
|
else:
|
||||||
|
# Regular URL — Anthropic supports url source type
|
||||||
|
result.append({
|
||||||
|
"type": "image",
|
||||||
|
"source": {
|
||||||
|
"type": "url",
|
||||||
|
"url": url,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
elif block.get("type") == "text":
|
||||||
|
result.append({"type": "text", "text": block.get("text", "")})
|
||||||
|
else:
|
||||||
|
# Pass through unknown block types
|
||||||
|
result.append(block)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def convert_messages_to_anthropic(
|
def convert_messages_to_anthropic(
|
||||||
messages: List[Dict],
|
messages: List[Dict],
|
||||||
) -> Tuple[Optional[Any], List[Dict]]:
|
) -> Tuple[Optional[Any], List[Dict]]:
|
||||||
|
|
@ -241,8 +304,9 @@ def convert_messages_to_anthropic(
|
||||||
result.append({"role": "user", "content": [tool_result]})
|
result.append({"role": "user", "content": [tool_result]})
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Regular user message
|
# Regular user message — convert vision content if multimodal
|
||||||
result.append({"role": "user", "content": content})
|
converted = _convert_vision_content(content) if isinstance(content, list) else content
|
||||||
|
result.append({"role": "user", "content": converted})
|
||||||
|
|
||||||
# Strip orphaned tool_use blocks (no matching tool_result follows)
|
# Strip orphaned tool_use blocks (no matching tool_result follows)
|
||||||
tool_result_ids = set()
|
tool_result_ids = set()
|
||||||
|
|
@ -261,6 +325,40 @@ def convert_messages_to_anthropic(
|
||||||
if not m["content"]:
|
if not m["content"]:
|
||||||
m["content"] = [{"type": "text", "text": "(tool call removed)"}]
|
m["content"] = [{"type": "text", "text": "(tool call removed)"}]
|
||||||
|
|
||||||
|
# Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
|
||||||
|
fixed = []
|
||||||
|
for m in result:
|
||||||
|
if fixed and fixed[-1]["role"] == m["role"]:
|
||||||
|
if m["role"] == "user":
|
||||||
|
# Merge consecutive user messages
|
||||||
|
prev_content = fixed[-1]["content"]
|
||||||
|
curr_content = m["content"]
|
||||||
|
if isinstance(prev_content, str) and isinstance(curr_content, str):
|
||||||
|
fixed[-1]["content"] = prev_content + "\n" + curr_content
|
||||||
|
elif isinstance(prev_content, list) and isinstance(curr_content, list):
|
||||||
|
fixed[-1]["content"] = prev_content + curr_content
|
||||||
|
else:
|
||||||
|
# Mixed types — wrap string in list
|
||||||
|
if isinstance(prev_content, str):
|
||||||
|
prev_content = [{"type": "text", "text": prev_content}]
|
||||||
|
if isinstance(curr_content, str):
|
||||||
|
curr_content = [{"type": "text", "text": curr_content}]
|
||||||
|
fixed[-1]["content"] = prev_content + curr_content
|
||||||
|
else:
|
||||||
|
# Consecutive assistant messages — merge text content
|
||||||
|
prev_blocks = fixed[-1]["content"]
|
||||||
|
curr_blocks = m["content"]
|
||||||
|
if isinstance(prev_blocks, list) and isinstance(curr_blocks, list):
|
||||||
|
fixed[-1]["content"] = prev_blocks + curr_blocks
|
||||||
|
elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
|
||||||
|
fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
|
||||||
|
else:
|
||||||
|
# Keep the later message
|
||||||
|
fixed[-1] = m
|
||||||
|
else:
|
||||||
|
fixed.append(m)
|
||||||
|
result = fixed
|
||||||
|
|
||||||
return system, result
|
return system, result
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -270,6 +368,7 @@ def build_anthropic_kwargs(
|
||||||
tools: Optional[List[Dict]],
|
tools: Optional[List[Dict]],
|
||||||
max_tokens: Optional[int],
|
max_tokens: Optional[int],
|
||||||
reasoning_config: Optional[Dict[str, Any]],
|
reasoning_config: Optional[Dict[str, Any]],
|
||||||
|
tool_choice: Optional[str] = None,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Build kwargs for anthropic.messages.create()."""
|
"""Build kwargs for anthropic.messages.create()."""
|
||||||
system, anthropic_messages = convert_messages_to_anthropic(messages)
|
system, anthropic_messages = convert_messages_to_anthropic(messages)
|
||||||
|
|
@ -289,6 +388,16 @@ def build_anthropic_kwargs(
|
||||||
|
|
||||||
if anthropic_tools:
|
if anthropic_tools:
|
||||||
kwargs["tools"] = anthropic_tools
|
kwargs["tools"] = anthropic_tools
|
||||||
|
# Map OpenAI tool_choice to Anthropic format
|
||||||
|
if tool_choice == "auto" or tool_choice is None:
|
||||||
|
kwargs["tool_choice"] = {"type": "auto"}
|
||||||
|
elif tool_choice == "required":
|
||||||
|
kwargs["tool_choice"] = {"type": "any"}
|
||||||
|
elif tool_choice == "none":
|
||||||
|
pass # Don't send tool_choice — Anthropic will use tools if needed
|
||||||
|
elif isinstance(tool_choice, str):
|
||||||
|
# Specific tool name
|
||||||
|
kwargs["tool_choice"] = {"type": "tool", "name": tool_choice}
|
||||||
|
|
||||||
# Map reasoning_config to Anthropic's thinking parameter
|
# Map reasoning_config to Anthropic's thinking parameter
|
||||||
if reasoning_config and isinstance(reasoning_config, dict):
|
if reasoning_config and isinstance(reasoning_config, dict):
|
||||||
|
|
|
||||||
25
run_agent.py
25
run_agent.py
|
|
@ -3830,9 +3830,15 @@ class AIAgent:
|
||||||
|
|
||||||
# Log cache hit stats when prompt caching is active
|
# Log cache hit stats when prompt caching is active
|
||||||
if self._use_prompt_caching:
|
if self._use_prompt_caching:
|
||||||
details = getattr(response.usage, 'prompt_tokens_details', None)
|
if self.api_mode == "anthropic_messages":
|
||||||
cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0
|
# Anthropic uses cache_read_input_tokens / cache_creation_input_tokens
|
||||||
written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0
|
cached = getattr(response.usage, 'cache_read_input_tokens', 0) or 0
|
||||||
|
written = getattr(response.usage, 'cache_creation_input_tokens', 0) or 0
|
||||||
|
else:
|
||||||
|
# OpenRouter uses prompt_tokens_details.cached_tokens
|
||||||
|
details = getattr(response.usage, 'prompt_tokens_details', None)
|
||||||
|
cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0
|
||||||
|
written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0
|
||||||
prompt = usage_dict["prompt_tokens"]
|
prompt = usage_dict["prompt_tokens"]
|
||||||
hit_pct = (cached / prompt * 100) if prompt > 0 else 0
|
hit_pct = (cached / prompt * 100) if prompt > 0 else 0
|
||||||
if not self.quiet_mode:
|
if not self.quiet_mode:
|
||||||
|
|
@ -3882,6 +3888,19 @@ class AIAgent:
|
||||||
if self._try_refresh_nous_client_credentials(force=True):
|
if self._try_refresh_nous_client_credentials(force=True):
|
||||||
print(f"{self.log_prefix}🔐 Nous agent key refreshed after 401. Retrying request...")
|
print(f"{self.log_prefix}🔐 Nous agent key refreshed after 401. Retrying request...")
|
||||||
continue
|
continue
|
||||||
|
if (
|
||||||
|
self.api_mode == "anthropic_messages"
|
||||||
|
and status_code == 401
|
||||||
|
and hasattr(self, '_anthropic_api_key')
|
||||||
|
):
|
||||||
|
# Try re-reading Claude Code credentials (they may have been refreshed)
|
||||||
|
from agent.anthropic_adapter import resolve_anthropic_token, build_anthropic_client
|
||||||
|
new_token = resolve_anthropic_token()
|
||||||
|
if new_token and new_token != self._anthropic_api_key:
|
||||||
|
self._anthropic_api_key = new_token
|
||||||
|
self._anthropic_client = build_anthropic_client(new_token)
|
||||||
|
print(f"{self.log_prefix}🔐 Anthropic credentials refreshed after 401. Retrying request...")
|
||||||
|
continue
|
||||||
|
|
||||||
retry_count += 1
|
retry_count += 1
|
||||||
elapsed_time = time.time() - api_start_time
|
elapsed_time = time.time() - api_start_time
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,10 @@ class TestBuildAnthropicClient:
|
||||||
build_anthropic_client("sk-ant-oat01-" + "x" * 60)
|
build_anthropic_client("sk-ant-oat01-" + "x" * 60)
|
||||||
kwargs = mock_sdk.Anthropic.call_args[1]
|
kwargs = mock_sdk.Anthropic.call_args[1]
|
||||||
assert "auth_token" in kwargs
|
assert "auth_token" in kwargs
|
||||||
assert "oauth-2025-04-20" in kwargs["default_headers"]["anthropic-beta"]
|
betas = kwargs["default_headers"]["anthropic-beta"]
|
||||||
|
assert "oauth-2025-04-20" in betas
|
||||||
|
assert "interleaved-thinking-2025-05-14" in betas
|
||||||
|
assert "fine-grained-tool-streaming-2025-05-14" in betas
|
||||||
assert "api_key" not in kwargs
|
assert "api_key" not in kwargs
|
||||||
|
|
||||||
def test_api_key_uses_api_key(self):
|
def test_api_key_uses_api_key(self):
|
||||||
|
|
@ -52,6 +55,10 @@ class TestBuildAnthropicClient:
|
||||||
kwargs = mock_sdk.Anthropic.call_args[1]
|
kwargs = mock_sdk.Anthropic.call_args[1]
|
||||||
assert kwargs["api_key"] == "sk-ant-api03-something"
|
assert kwargs["api_key"] == "sk-ant-api03-something"
|
||||||
assert "auth_token" not in kwargs
|
assert "auth_token" not in kwargs
|
||||||
|
# API key auth should still get common betas
|
||||||
|
betas = kwargs["default_headers"]["anthropic-beta"]
|
||||||
|
assert "interleaved-thinking-2025-05-14" in betas
|
||||||
|
assert "oauth-2025-04-20" not in betas # OAuth-only beta NOT present
|
||||||
|
|
||||||
def test_custom_base_url(self):
|
def test_custom_base_url(self):
|
||||||
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
|
with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
|
||||||
|
|
@ -404,3 +411,119 @@ class TestNormalizeResponse:
|
||||||
)
|
)
|
||||||
assert msg.content is None
|
assert msg.content is None
|
||||||
assert len(msg.tool_calls) == 1
|
assert len(msg.tool_calls) == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Vision content conversion
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestVisionContentConversion:
|
||||||
|
def test_base64_image(self):
|
||||||
|
from agent.anthropic_adapter import _convert_vision_content
|
||||||
|
|
||||||
|
content = [
|
||||||
|
{"type": "text", "text": "What's in this image?"},
|
||||||
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR"}},
|
||||||
|
]
|
||||||
|
result = _convert_vision_content(content)
|
||||||
|
assert result[0] == {"type": "text", "text": "What's in this image?"}
|
||||||
|
assert result[1]["type"] == "image"
|
||||||
|
assert result[1]["source"]["type"] == "base64"
|
||||||
|
assert result[1]["source"]["media_type"] == "image/png"
|
||||||
|
assert result[1]["source"]["data"] == "iVBOR"
|
||||||
|
|
||||||
|
def test_url_image(self):
|
||||||
|
from agent.anthropic_adapter import _convert_vision_content
|
||||||
|
|
||||||
|
content = [
|
||||||
|
{"type": "image_url", "image_url": {"url": "https://example.com/img.png"}},
|
||||||
|
]
|
||||||
|
result = _convert_vision_content(content)
|
||||||
|
assert result[0]["type"] == "image"
|
||||||
|
assert result[0]["source"]["type"] == "url"
|
||||||
|
assert result[0]["source"]["url"] == "https://example.com/img.png"
|
||||||
|
|
||||||
|
def test_passthrough_non_list(self):
|
||||||
|
from agent.anthropic_adapter import _convert_vision_content
|
||||||
|
|
||||||
|
assert _convert_vision_content("plain text") == "plain text"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Role alternation
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestRoleAlternation:
|
||||||
|
def test_merges_consecutive_user_messages(self):
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "Hello"},
|
||||||
|
{"role": "user", "content": "World"},
|
||||||
|
]
|
||||||
|
_, result = convert_messages_to_anthropic(messages)
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0]["role"] == "user"
|
||||||
|
assert "Hello" in result[0]["content"]
|
||||||
|
assert "World" in result[0]["content"]
|
||||||
|
|
||||||
|
def test_preserves_proper_alternation(self):
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "Hi"},
|
||||||
|
{"role": "assistant", "content": "Hello!"},
|
||||||
|
{"role": "user", "content": "How are you?"},
|
||||||
|
]
|
||||||
|
_, result = convert_messages_to_anthropic(messages)
|
||||||
|
assert len(result) == 3
|
||||||
|
assert [m["role"] for m in result] == ["user", "assistant", "user"]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Tool choice
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestToolChoice:
|
||||||
|
_DUMMY_TOOL = [
|
||||||
|
{
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": "test",
|
||||||
|
"description": "x",
|
||||||
|
"parameters": {"type": "object", "properties": {}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
def test_auto_tool_choice(self):
|
||||||
|
kwargs = build_anthropic_kwargs(
|
||||||
|
model="claude-sonnet-4-20250514",
|
||||||
|
messages=[{"role": "user", "content": "Hi"}],
|
||||||
|
tools=self._DUMMY_TOOL,
|
||||||
|
max_tokens=4096,
|
||||||
|
reasoning_config=None,
|
||||||
|
tool_choice="auto",
|
||||||
|
)
|
||||||
|
assert kwargs["tool_choice"] == {"type": "auto"}
|
||||||
|
|
||||||
|
def test_required_tool_choice(self):
|
||||||
|
kwargs = build_anthropic_kwargs(
|
||||||
|
model="claude-sonnet-4-20250514",
|
||||||
|
messages=[{"role": "user", "content": "Hi"}],
|
||||||
|
tools=self._DUMMY_TOOL,
|
||||||
|
max_tokens=4096,
|
||||||
|
reasoning_config=None,
|
||||||
|
tool_choice="required",
|
||||||
|
)
|
||||||
|
assert kwargs["tool_choice"] == {"type": "any"}
|
||||||
|
|
||||||
|
def test_specific_tool_choice(self):
|
||||||
|
kwargs = build_anthropic_kwargs(
|
||||||
|
model="claude-sonnet-4-20250514",
|
||||||
|
messages=[{"role": "user", "content": "Hi"}],
|
||||||
|
tools=self._DUMMY_TOOL,
|
||||||
|
max_tokens=4096,
|
||||||
|
reasoning_config=None,
|
||||||
|
tool_choice="search",
|
||||||
|
)
|
||||||
|
assert kwargs["tool_choice"] == {"type": "tool", "name": "search"}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue