Merge pull request #1097 from NousResearch/hermes/hermes-c877bdeb

feat: native Anthropic provider with Claude Code credential auto-discovery
2026-03-12 17:49:39 -07:00 · 2026-03-12 17:49:39 -07:00 · 0219abfeed
commit 0219abfeed
parent df07baedfe e976879cf2
15 changed files with 1578 additions and 75 deletions
--- a/agent/anthropic_adapter.py
+++ b/agent/anthropic_adapter.py
@ -0,0 +1,466 @@
 """Anthropic Messages API adapter for Hermes Agent.
 Translates between Hermes's internal OpenAI-style message format and
 Anthropic's Messages API. Follows the same pattern as the codex_responses
 adapter — all provider-specific logic is isolated here.
 Auth supports:
  - Regular API keys (sk-ant-api*) → x-api-key header
  - OAuth setup-tokens (sk-ant-oat*) → Bearer auth + beta header
  - Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json) → Bearer auth
 """
 import json
 import logging
 import os
 from pathlib import Path
 from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
 try:
    import anthropic as _anthropic_sdk
 except ImportError:
    _anthropic_sdk = None  # type: ignore[assignment]
 logger = logging.getLogger(__name__)
 THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000}
 # Beta headers for enhanced features (sent with ALL auth types)
 _COMMON_BETAS = [
    "interleaved-thinking-2025-05-14",
    "fine-grained-tool-streaming-2025-05-14",
 ]
 # Additional beta headers required for OAuth/subscription auth
 _OAUTH_ONLY_BETAS = [
    "oauth-2025-04-20",
 ]
 def _is_oauth_token(key: str) -> bool:
    """Check if the key is an OAuth/setup token (not a regular Console API key).
    Regular API keys start with 'sk-ant-api'. Everything else (setup-tokens
    starting with 'sk-ant-oat', managed keys, JWTs, etc.) needs Bearer auth.
    """
    if not key:
        return False
    # Regular Console API keys use x-api-key header
    if key.startswith("sk-ant-api"):
        return False
    # Everything else (setup-tokens, managed keys, JWTs) uses Bearer auth
    return True
 def build_anthropic_client(api_key: str, base_url: str = None):
    """Create an Anthropic client, auto-detecting setup-tokens vs API keys.
    Returns an anthropic.Anthropic instance.
    """
    if _anthropic_sdk is None:
        raise ImportError(
            "The 'anthropic' package is required for the Anthropic provider. "
            "Install it with: pip install 'anthropic>=0.39.0'"
        )
    from httpx import Timeout
    kwargs = {
        "timeout": Timeout(timeout=900.0, connect=10.0),
    }
    if base_url:
        kwargs["base_url"] = base_url
    if _is_oauth_token(api_key):
        # OAuth access token / setup-token → Bearer auth + beta headers
        all_betas = _COMMON_BETAS + _OAUTH_ONLY_BETAS
        kwargs["auth_token"] = api_key
        kwargs["default_headers"] = {"anthropic-beta": ",".join(all_betas)}
    else:
        # Regular API key → x-api-key header + common betas
        kwargs["api_key"] = api_key
        if _COMMON_BETAS:
            kwargs["default_headers"] = {"anthropic-beta": ",".join(_COMMON_BETAS)}
    return _anthropic_sdk.Anthropic(**kwargs)
 def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
    """Read credentials from Claude Code's config files.
    Checks two locations (in order):
      1. ~/.claude.json — top-level primaryApiKey (native binary, v2.x)
      2. ~/.claude/.credentials.json — claudeAiOauth block (npm/legacy installs)
    Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
    """
    # 1. Native binary (v2.x): ~/.claude.json with top-level primaryApiKey
    claude_json = Path.home() / ".claude.json"
    if claude_json.exists():
        try:
            data = json.loads(claude_json.read_text(encoding="utf-8"))
            primary_key = data.get("primaryApiKey", "")
            if primary_key:
                return {
                    "accessToken": primary_key,
                    "refreshToken": "",
                    "expiresAt": 0,  # Managed keys don't have a user-visible expiry
                }
        except (json.JSONDecodeError, OSError, IOError) as e:
            logger.debug("Failed to read ~/.claude.json: %s", e)
    # 2. Legacy/npm installs: ~/.claude/.credentials.json
    cred_path = Path.home() / ".claude" / ".credentials.json"
    if cred_path.exists():
        try:
            data = json.loads(cred_path.read_text(encoding="utf-8"))
            oauth_data = data.get("claudeAiOauth")
            if oauth_data and isinstance(oauth_data, dict):
                access_token = oauth_data.get("accessToken", "")
                if access_token:
                    return {
                        "accessToken": access_token,
                        "refreshToken": oauth_data.get("refreshToken", ""),
                        "expiresAt": oauth_data.get("expiresAt", 0),
                    }
        except (json.JSONDecodeError, OSError, IOError) as e:
            logger.debug("Failed to read ~/.claude/.credentials.json: %s", e)
    return None
 def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
    """Check if Claude Code credentials have a non-expired access token."""
    import time
    expires_at = creds.get("expiresAt", 0)
    if not expires_at:
        # No expiry set (managed keys) — valid if token is present
        return bool(creds.get("accessToken"))
    # expiresAt is in milliseconds since epoch
    now_ms = int(time.time() * 1000)
    # Allow 60 seconds of buffer
    return now_ms < (expires_at - 60_000)
 def resolve_anthropic_token() -> Optional[str]:
    """Resolve an Anthropic token from all available sources.
    Priority:
      1. ANTHROPIC_API_KEY env var (regular API key)
      2. ANTHROPIC_TOKEN env var (OAuth/setup token)
      3. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
    Returns the token string or None.
    """
    # 1. Regular API key
    api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
    if api_key:
        return api_key
    # 2. OAuth/setup token env var
    token = os.getenv("ANTHROPIC_TOKEN", "").strip()
    if token:
        return token
    # Also check CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
    cc_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip()
    if cc_token:
        return cc_token
    # 3. Claude Code credential file
    creds = read_claude_code_credentials()
    if creds and is_claude_code_token_valid(creds):
        logger.debug("Using Claude Code credentials (auto-detected)")
        return creds["accessToken"]
    elif creds:
        logger.debug("Claude Code credentials expired — run 'claude' to refresh")
    return None
 # ---------------------------------------------------------------------------
 # Message / tool / response format conversion
 # ---------------------------------------------------------------------------
 def normalize_model_name(model: str) -> str:
    """Normalize a model name for the Anthropic API.
    - Strips 'anthropic/' prefix (OpenRouter format, case-insensitive)
    """
    lower = model.lower()
    if lower.startswith("anthropic/"):
        model = model[len("anthropic/"):]
    return model
 def _sanitize_tool_id(tool_id: str) -> str:
    """Sanitize a tool call ID for the Anthropic API.
    Anthropic requires IDs matching [a-zA-Z0-9_-]. Replace invalid
    characters with underscores and ensure non-empty.
    """
    import re
    if not tool_id:
        return "tool_0"
    sanitized = re.sub(r"[^a-zA-Z0-9_-]", "_", tool_id)
    return sanitized or "tool_0"
 def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
    """Convert OpenAI tool definitions to Anthropic format."""
    if not tools:
        return []
    result = []
    for t in tools:
        fn = t.get("function", {})
        result.append({
            "name": fn.get("name", ""),
            "description": fn.get("description", ""),
            "input_schema": fn.get("parameters", {"type": "object", "properties": {}}),
        })
    return result
 def convert_messages_to_anthropic(
    messages: List[Dict],
 ) -> Tuple[Optional[Any], List[Dict]]:
    """Convert OpenAI-format messages to Anthropic format.
    Returns (system_prompt, anthropic_messages).
    System messages are extracted since Anthropic takes them as a separate param.
    system_prompt is a string or list of content blocks (when cache_control present).
    """
    system = None
    result = []
    for m in messages:
        role = m.get("role", "user")
        content = m.get("content", "")
        if role == "system":
            if isinstance(content, list):
                # Preserve cache_control markers on content blocks
                has_cache = any(
                    p.get("cache_control") for p in content if isinstance(p, dict)
                )
                if has_cache:
                    system = [p for p in content if isinstance(p, dict)]
                else:
                    system = "\n".join(
                        p["text"] for p in content if p.get("type") == "text"
                    )
            else:
                system = content
            continue
        if role == "assistant":
            blocks = []
            if content:
                text = content if isinstance(content, str) else json.dumps(content)
                blocks.append({"type": "text", "text": text})
            for tc in m.get("tool_calls", []):
                fn = tc.get("function", {})
                args = fn.get("arguments", "{}")
                try:
                    parsed_args = json.loads(args) if isinstance(args, str) else args
                except (json.JSONDecodeError, ValueError):
                    parsed_args = {}
                blocks.append({
                    "type": "tool_use",
                    "id": _sanitize_tool_id(tc.get("id", "")),
                    "name": fn.get("name", ""),
                    "input": parsed_args,
                })
            # Anthropic rejects empty assistant content
            effective = blocks or content
            if not effective or effective == "":
                effective = [{"type": "text", "text": "(empty)"}]
            result.append({"role": "assistant", "content": effective})
            continue
        if role == "tool":
            # Sanitize tool_use_id and ensure non-empty content
            result_content = content if isinstance(content, str) else json.dumps(content)
            if not result_content:
                result_content = "(no output)"
            tool_result = {
                "type": "tool_result",
                "tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
                "content": result_content,
            }
            # Merge consecutive tool results into one user message
            if (
                result
                and result[-1]["role"] == "user"
                and isinstance(result[-1]["content"], list)
                and result[-1]["content"]
                and result[-1]["content"][0].get("type") == "tool_result"
            ):
                result[-1]["content"].append(tool_result)
            else:
                result.append({"role": "user", "content": [tool_result]})
            continue
        # Regular user message
        result.append({"role": "user", "content": content})
    # Strip orphaned tool_use blocks (no matching tool_result follows)
    tool_result_ids = set()
    for m in result:
        if m["role"] == "user" and isinstance(m["content"], list):
            for block in m["content"]:
                if block.get("type") == "tool_result":
                    tool_result_ids.add(block.get("tool_use_id"))
    for m in result:
        if m["role"] == "assistant" and isinstance(m["content"], list):
            m["content"] = [
                b
                for b in m["content"]
                if b.get("type") != "tool_use" or b.get("id") in tool_result_ids
            ]
            if not m["content"]:
                m["content"] = [{"type": "text", "text": "(tool call removed)"}]
    # Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
    fixed = []
    for m in result:
        if fixed and fixed[-1]["role"] == m["role"]:
            if m["role"] == "user":
                # Merge consecutive user messages
                prev_content = fixed[-1]["content"]
                curr_content = m["content"]
                if isinstance(prev_content, str) and isinstance(curr_content, str):
                    fixed[-1]["content"] = prev_content + "\n" + curr_content
                elif isinstance(prev_content, list) and isinstance(curr_content, list):
                    fixed[-1]["content"] = prev_content + curr_content
                else:
                    # Mixed types — wrap string in list
                    if isinstance(prev_content, str):
                        prev_content = [{"type": "text", "text": prev_content}]
                    if isinstance(curr_content, str):
                        curr_content = [{"type": "text", "text": curr_content}]
                    fixed[-1]["content"] = prev_content + curr_content
            else:
                # Consecutive assistant messages — merge text content
                prev_blocks = fixed[-1]["content"]
                curr_blocks = m["content"]
                if isinstance(prev_blocks, list) and isinstance(curr_blocks, list):
                    fixed[-1]["content"] = prev_blocks + curr_blocks
                elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
                    fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
                else:
                    # Keep the later message
                    fixed[-1] = m
        else:
            fixed.append(m)
    result = fixed
    return system, result
 def build_anthropic_kwargs(
    model: str,
    messages: List[Dict],
    tools: Optional[List[Dict]],
    max_tokens: Optional[int],
    reasoning_config: Optional[Dict[str, Any]],
    tool_choice: Optional[str] = None,
 ) -> Dict[str, Any]:
    """Build kwargs for anthropic.messages.create()."""
    system, anthropic_messages = convert_messages_to_anthropic(messages)
    anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
    model = normalize_model_name(model)
    effective_max_tokens = max_tokens or 16384
    kwargs: Dict[str, Any] = {
        "model": model,
        "messages": anthropic_messages,
        "max_tokens": effective_max_tokens,
    }
    if system:
        kwargs["system"] = system
    if anthropic_tools:
        kwargs["tools"] = anthropic_tools
        # Map OpenAI tool_choice to Anthropic format
        if tool_choice == "auto" or tool_choice is None:
            kwargs["tool_choice"] = {"type": "auto"}
        elif tool_choice == "required":
            kwargs["tool_choice"] = {"type": "any"}
        elif tool_choice == "none":
            pass  # Don't send tool_choice — Anthropic will use tools if needed
        elif isinstance(tool_choice, str):
            # Specific tool name
            kwargs["tool_choice"] = {"type": "tool", "name": tool_choice}
    # Map reasoning_config to Anthropic's thinking parameter
    # Newer models (4.6+) prefer "adaptive" thinking; older models use "enabled"
    if reasoning_config and isinstance(reasoning_config, dict):
        if reasoning_config.get("enabled") is not False:
            effort = reasoning_config.get("effort", "medium")
            budget = THINKING_BUDGET.get(effort, 8000)
            # Use adaptive thinking for 4.5+ models (they deprecate type=enabled)
            if any(v in model for v in ("4-6", "4-5", "4.6", "4.5")):
                kwargs["thinking"] = {"type": "adaptive", "budget_tokens": budget}
            else:
                kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
                # Anthropic requires temperature=1 when thinking is enabled on older models
                kwargs["temperature"] = 1
            kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)
    return kwargs
 def normalize_anthropic_response(
    response,
 ) -> Tuple[SimpleNamespace, str]:
    """Normalize Anthropic response to match the shape expected by AIAgent.
    Returns (assistant_message, finish_reason) where assistant_message has
    .content, .tool_calls, and .reasoning attributes.
    """
    text_parts = []
    reasoning_parts = []
    tool_calls = []
    for block in response.content:
        if block.type == "text":
            text_parts.append(block.text)
        elif block.type == "thinking":
            reasoning_parts.append(block.thinking)
        elif block.type == "tool_use":
            tool_calls.append(
                SimpleNamespace(
                    id=block.id,
                    type="function",
                    function=SimpleNamespace(
                        name=block.name,
                        arguments=json.dumps(block.input),
                    ),
                )
            )
    # Map Anthropic stop_reason to OpenAI finish_reason
    stop_reason_map = {
        "end_turn": "stop",
        "tool_use": "tool_calls",
        "max_tokens": "length",
        "stop_sequence": "stop",
    }
    finish_reason = stop_reason_map.get(response.stop_reason, "stop")
    return (
        SimpleNamespace(
            content="\n".join(text_parts) if text_parts else None,
            tool_calls=tool_calls or None,
            reasoning="\n\n".join(reasoning_parts) if reasoning_parts else None,
            reasoning_content=None,
            reasoning_details=None,
        ),
        finish_reason,
    )
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -51,6 +51,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
    "kimi-coding": "kimi-k2-turbo-preview",
    "minimax": "MiniMax-M2.5-highspeed",
    "minimax-cn": "MiniMax-M2.5-highspeed",
    "anthropic": "claude-haiku-4-5-20251001",
 }
 # OpenRouter app attribution headers
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -41,6 +41,15 @@ DEFAULT_CONTEXT_LENGTHS = {
    "anthropic/claude-sonnet-4": 200000,
    "anthropic/claude-sonnet-4-20250514": 200000,
    "anthropic/claude-haiku-4.5": 200000,
    # Bare Anthropic model IDs (for native API provider)
    "claude-opus-4-6": 200000,
    "claude-sonnet-4-6": 200000,
    "claude-opus-4-5-20251101": 200000,
    "claude-sonnet-4-5-20250929": 200000,
    "claude-opus-4-1-20250805": 200000,
    "claude-opus-4-20250514": 200000,
    "claude-sonnet-4-20250514": 200000,
    "claude-haiku-4-5-20251001": 200000,
    "openai/gpt-4o": 128000,
    "openai/gpt-4-turbo": 128000,
    "openai/gpt-4o-mini": 128000,
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@ -132,6 +132,13 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
        api_key_env_vars=("MINIMAX_API_KEY",),
        base_url_env_var="MINIMAX_BASE_URL",
    ),
    "anthropic": ProviderConfig(
        id="anthropic",
        name="Anthropic",
        auth_type="api_key",
        inference_base_url="https://api.anthropic.com",
        api_key_env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
    ),
    "minimax-cn": ProviderConfig(
        id="minimax-cn",
        name="MiniMax (China)",
@ -516,6 +523,7 @@ def resolve_provider(
        "glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
        "kimi": "kimi-coding", "moonshot": "kimi-coding",
        "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
        "claude": "anthropic", "claude-code": "anthropic",
    }
    normalized = _PROVIDER_ALIASES.get(normalized, normalized)
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -748,6 +748,7 @@ def cmd_model(args):
        "openrouter": "OpenRouter",
        "nous": "Nous Portal",
        "openai-codex": "OpenAI Codex",
        "anthropic": "Anthropic",
        "zai": "Z.AI / GLM",
        "kimi-coding": "Kimi / Moonshot",
        "minimax": "MiniMax",
@ -766,6 +767,7 @@ def cmd_model(args):
        ("openrouter", "OpenRouter (100+ models, pay-per-use)"),
        ("nous", "Nous Portal (Nous Research subscription)"),
        ("openai-codex", "OpenAI Codex"),
        ("anthropic", "Anthropic (Claude models — API key or Claude Code)"),
        ("zai", "Z.AI / GLM (Zhipu AI direct API)"),
        ("kimi-coding", "Kimi / Moonshot (Moonshot AI direct API)"),
        ("minimax", "MiniMax (global direct API)"),
@ -834,6 +836,8 @@ def cmd_model(args):
        _model_flow_named_custom(config, _custom_provider_map[selected_provider])
    elif selected_provider == "remove-custom":
        _remove_custom_provider(config)
    elif selected_provider == "anthropic":
        _model_flow_anthropic(config, current_model)
    elif selected_provider == "kimi-coding":
        _model_flow_kimi(config, current_model)
    elif selected_provider in ("zai", "minimax", "minimax-cn"):
@ -1570,6 +1574,140 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
        print("No change.")
 def _model_flow_anthropic(config, current_model=""):
    """Flow for Anthropic provider — setup-token, API key, or Claude Code creds."""
    import os
    from hermes_cli.auth import (
        PROVIDER_REGISTRY, _prompt_model_selection, _save_model_choice,
        _update_config_for_provider, deactivate_provider,
    )
    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
    from hermes_cli.models import _PROVIDER_MODELS
    pconfig = PROVIDER_REGISTRY["anthropic"]
    # Check for existing credentials
    existing_key = (
        get_env_value("ANTHROPIC_API_KEY")
        or os.getenv("ANTHROPIC_API_KEY", "")
        or get_env_value("ANTHROPIC_TOKEN")
        or os.getenv("ANTHROPIC_TOKEN", "")
    )
    cc_available = False
    try:
        from agent.anthropic_adapter import read_claude_code_credentials, is_claude_code_token_valid
        cc_creds = read_claude_code_credentials()
        if cc_creds and is_claude_code_token_valid(cc_creds):
            cc_available = True
    except Exception:
        pass
    if existing_key:
        print(f"  Anthropic credentials: {existing_key[:12]}... ✓")
        print()
        try:
            update = input("Update credentials? [y/N]: ").strip().lower()
        except (KeyboardInterrupt, EOFError):
            update = ""
        if update != "y":
            pass  # skip to model selection
        else:
            existing_key = ""  # fall through to auth choice below
    elif cc_available:
        print("  Claude Code credentials: ✓ (auto-detected)")
        print()
    if not existing_key and not cc_available:
        # No credentials — show auth method choice
        print()
        print("  Choose authentication method:")
        print()
        print("    1. Claude Pro/Max subscription (setup-token)")
        print("    2. Anthropic API key (pay-per-token)")
        print("    3. Cancel")
        print()
        try:
            choice = input("  Choice [1/2/3]: ").strip()
        except (KeyboardInterrupt, EOFError):
            print()
            return
        if choice == "1":
            print()
            print("  To get a setup-token from your Claude subscription:")
            print()
            print("    1. Install Claude Code:  npm install -g @anthropic-ai/claude-code")
            print("    2. Run:                  claude setup-token")
            print("    3. Open the URL it prints in your browser")
            print("    4. Log in and click \"Authorize\"")
            print("    5. Paste the auth code back into Claude Code")
            print("    6. Copy the resulting sk-ant-oat01-... token")
            print()
            try:
                token = input("  Paste setup-token here: ").strip()
            except (KeyboardInterrupt, EOFError):
                print()
                return
            if not token:
                print("  Cancelled.")
                return
            save_env_value("ANTHROPIC_API_KEY", token)
            print("  ✓ Setup-token saved.")
        elif choice == "2":
            print()
            print("  Get an API key at: https://console.anthropic.com/settings/keys")
            print()
            try:
                api_key = input("  API key (sk-ant-api03-...): ").strip()
            except (KeyboardInterrupt, EOFError):
                print()
                return
            if not api_key:
                print("  Cancelled.")
                return
            save_env_value("ANTHROPIC_API_KEY", api_key)
            print("  ✓ API key saved.")
        else:
            print("  No change.")
            return
    print()
    # Model selection
    model_list = _PROVIDER_MODELS.get("anthropic", [])
    if model_list:
        selected = _prompt_model_selection(model_list, current_model=current_model)
    else:
        try:
            selected = input("Model name (e.g., claude-sonnet-4-20250514): ").strip()
        except (KeyboardInterrupt, EOFError):
            selected = None
    if selected:
        # Clear custom endpoint if set
        if get_env_value("OPENAI_BASE_URL"):
            save_env_value("OPENAI_BASE_URL", "")
            save_env_value("OPENAI_API_KEY", "")
        _save_model_choice(selected)
        # Update config with provider
        cfg = load_config()
        model = cfg.get("model")
        if not isinstance(model, dict):
            model = {"default": model} if model else {}
            cfg["model"] = model
        model["provider"] = "anthropic"
        model["base_url"] = pconfig.inference_base_url
        save_config(cfg)
        deactivate_provider()
        print(f"Default model set to: {selected} (via Anthropic)")
    else:
        print("No change.")
 def cmd_login(args):
    """Authenticate Hermes CLI with a provider."""
    from hermes_cli.auth import login_command
@ -2050,7 +2188,7 @@ For more help on a command:
    )
    chat_parser.add_argument(
        "--provider",
-        choices=["auto", "openrouter", "nous", "openai-codex", "zai", "kimi-coding", "minimax", "minimax-cn"],
+        choices=["auto", "openrouter", "nous", "openai-codex", "anthropic", "zai", "kimi-coding", "minimax", "minimax-cn"],
        default=None,
        help="Inference provider (default: auto)"
    )
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -68,6 +68,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "MiniMax-M2.5-highspeed",
        "MiniMax-M2.1",
    ],
    "anthropic": [
        "claude-opus-4-6",
        "claude-sonnet-4-6",
        "claude-opus-4-5-20251101",
        "claude-sonnet-4-5-20250929",
        "claude-opus-4-20250514",
        "claude-sonnet-4-20250514",
        "claude-haiku-4-5-20251001",
    ],
 }
 _PROVIDER_LABELS = {
@ -78,6 +87,7 @@ _PROVIDER_LABELS = {
    "kimi-coding": "Kimi / Moonshot",
    "minimax": "MiniMax",
    "minimax-cn": "MiniMax (China)",
    "anthropic": "Anthropic",
    "custom": "Custom endpoint",
 }
@ -90,6 +100,8 @@ _PROVIDER_ALIASES = {
    "moonshot": "kimi-coding",
    "minimax-china": "minimax-cn",
    "minimax_cn": "minimax-cn",
    "claude": "anthropic",
    "claude-code": "anthropic",
 }
@ -123,7 +135,7 @@ def list_available_providers() -> list[dict[str, str]]:
    # Canonical providers in display order
    _PROVIDER_ORDER = [
        "openrouter", "nous", "openai-codex",
-        "zai", "kimi-coding", "minimax", "minimax-cn",
+        "zai", "kimi-coding", "minimax", "minimax-cn", "anthropic",
    ]
    # Build reverse alias map
    aliases_for: dict[str, list[str]] = {}
@ -234,9 +246,56 @@ def provider_model_ids(provider: Optional[str]) -> list[str]:
                    return live
        except Exception:
            pass
    if normalized == "anthropic":
        live = _fetch_anthropic_models()
        if live:
            return live
    return list(_PROVIDER_MODELS.get(normalized, []))
 def _fetch_anthropic_models(timeout: float = 5.0) -> Optional[list[str]]:
    """Fetch available models from the Anthropic /v1/models endpoint.
    Uses resolve_anthropic_token() to find credentials (env vars or
    Claude Code auto-discovery).  Returns sorted model IDs or None.
    """
    try:
        from agent.anthropic_adapter import resolve_anthropic_token, _is_oauth_token
    except ImportError:
        return None
    token = resolve_anthropic_token()
    if not token:
        return None
    headers: dict[str, str] = {"anthropic-version": "2023-06-01"}
    if _is_oauth_token(token):
        headers["Authorization"] = f"Bearer {token}"
        headers["anthropic-beta"] = "oauth-2025-04-20"
    else:
        headers["x-api-key"] = token
    req = urllib.request.Request(
        "https://api.anthropic.com/v1/models",
        headers=headers,
    )
    try:
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            data = json.loads(resp.read().decode())
            models = [m["id"] for m in data.get("data", []) if m.get("id")]
            # Sort: latest/largest first (opus > sonnet > haiku, higher version first)
            return sorted(models, key=lambda m: (
                "opus" not in m,      # opus first
                "sonnet" not in m,    # then sonnet
                "haiku" not in m,     # then haiku
                m,                    # alphabetical within tier
            ))
    except Exception as e:
        import logging
        logging.getLogger(__name__).debug("Failed to fetch Anthropic models: %s", e)
        return None
 def fetch_api_models(
    api_key: Optional[str],
    base_url: Optional[str],
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@ -153,6 +153,24 @@ def resolve_runtime_provider(
            "requested_provider": requested_provider,
        }
    # Anthropic (native Messages API)
    if provider == "anthropic":
        from agent.anthropic_adapter import resolve_anthropic_token
        token = resolve_anthropic_token()
        if not token:
            raise AuthError(
                "No Anthropic credentials found. Set ANTHROPIC_API_KEY, "
                "run 'claude setup-token', or authenticate with 'claude /login'."
            )
        return {
            "provider": "anthropic",
            "api_mode": "anthropic_messages",
            "base_url": "https://api.anthropic.com",
            "api_key": token,
            "source": "env",
            "requested_provider": requested_provider,
        }
    # API-key providers (z.ai/GLM, Kimi, MiniMax, MiniMax-CN)
    pconfig = PROVIDER_REGISTRY.get(provider)
    if pconfig and pconfig.auth_type == "api_key":
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -689,6 +689,7 @@ def setup_model_provider(config: dict):
        "Kimi / Moonshot (Kimi coding models)",
        "MiniMax (global endpoint)",
        "MiniMax China (mainland China endpoint)",
        "Anthropic (Claude models — API key or Claude Code subscription)",
    ]
    if keep_label:
        provider_choices.append(keep_label)
@ -1068,7 +1069,74 @@ def setup_model_provider(config: dict):
        _update_config_for_provider("minimax-cn", pconfig.inference_base_url)
        _set_model_provider(config, "minimax-cn", pconfig.inference_base_url)
-    # else: provider_idx == 8 (Keep current) — only shown when a provider already exists
+    elif provider_idx == 8:  # Anthropic
        selected_provider = "anthropic"
        print()
        print_header("Anthropic Authentication")
        from hermes_cli.auth import PROVIDER_REGISTRY
        pconfig = PROVIDER_REGISTRY["anthropic"]
        # Check for Claude Code credential auto-discovery
        from agent.anthropic_adapter import read_claude_code_credentials, is_claude_code_token_valid
        cc_creds = read_claude_code_credentials()
        if cc_creds and is_claude_code_token_valid(cc_creds):
            print_success("Found valid Claude Code credentials (~/.claude/.credentials.json)")
            if prompt_yes_no("Use these credentials?", True):
                print_success("Using Claude Code subscription credentials")
            else:
                cc_creds = None
        existing_key = get_env_value("ANTHROPIC_API_KEY") or get_env_value("ANTHROPIC_TOKEN")
        if not (cc_creds and is_claude_code_token_valid(cc_creds)):
            if existing_key:
                print_info(f"Current credentials: {existing_key[:12]}...")
                if not prompt_yes_no("Update credentials?", False):
                    # User wants to keep existing — skip auth prompt entirely
                    existing_key = "KEEP"  # truthy sentinel to skip auth choice
            if not existing_key and not (cc_creds and is_claude_code_token_valid(cc_creds)):
                auth_choices = [
                    "Claude Pro/Max subscription (setup-token)",
                    "Anthropic API key (pay-per-token)",
                ]
                auth_idx = prompt_choice("Choose authentication method:", auth_choices, 0)
                if auth_idx == 0:
                    print()
                    print_info("To get a setup-token from your Claude subscription:")
                    print_info("  1. Install Claude Code:  npm install -g @anthropic-ai/claude-code")
                    print_info("  2. Run:                  claude setup-token")
                    print_info("  3. Open the URL it prints in your browser")
                    print_info("  4. Log in and click \"Authorize\"")
                    print_info("  5. Paste the auth code back into Claude Code")
                    print_info("  6. Copy the resulting sk-ant-oat01-... token")
                    print()
                    token = prompt("Paste setup-token here", password=True)
                    if token:
                        save_env_value("ANTHROPIC_API_KEY", token)
                        print_success("Setup-token saved")
                    else:
                        print_warning("Skipped — agent won't work without credentials")
                else:
                    print()
                    print_info("Get an API key at: https://console.anthropic.com/settings/keys")
                    print()
                    api_key = prompt("API key (sk-ant-api03-...)", password=True)
                    if api_key:
                        save_env_value("ANTHROPIC_API_KEY", api_key)
                        print_success("API key saved")
                    else:
                        print_warning("Skipped — agent won't work without credentials")
        # Clear custom endpoint vars if switching
        if existing_custom:
            save_env_value("OPENAI_BASE_URL", "")
            save_env_value("OPENAI_API_KEY", "")
        _update_config_for_provider("anthropic", pconfig.inference_base_url)
        _set_model_provider(config, "anthropic", pconfig.inference_base_url)
    # else: provider_idx == 9 (Keep current) — only shown when a provider already exists
    # ── OpenRouter API Key for tools (if not already set) ──
    # Tools (vision, web, MoA) use OpenRouter independently of the main provider.
@ -1081,6 +1149,7 @@ def setup_model_provider(config: dict):
        "kimi-coding",
        "minimax",
        "minimax-cn",
        "anthropic",
    ) and not get_env_value("OPENROUTER_API_KEY"):
        print()
        print_header("OpenRouter API Key (for tools)")
@ -1174,6 +1243,79 @@ def setup_model_provider(config: dict):
                config, selected_provider, current_model,
                prompt_choice, prompt,
            )
            if is_coding_plan:
                zai_models = ["glm-4.7", "glm-4.5", "glm-4.5-flash"]
            else:
                zai_models = ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"]
            model_choices = list(zai_models)
            model_choices.append("Custom model")
            model_choices.append(f"Keep current ({current_model})")
            keep_idx = len(model_choices) - 1
            model_idx = prompt_choice("Select default model:", model_choices, keep_idx)
            if model_idx < len(zai_models):
                _set_default_model(config, zai_models[model_idx])
            elif model_idx == len(zai_models):
                custom = prompt("Enter model name")
                if custom:
                    _set_default_model(config, custom)
            # else: keep current
        elif selected_provider == "kimi-coding":
            kimi_models = ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"]
            model_choices = list(kimi_models)
            model_choices.append("Custom model")
            model_choices.append(f"Keep current ({current_model})")
            keep_idx = len(model_choices) - 1
            model_idx = prompt_choice("Select default model:", model_choices, keep_idx)
            if model_idx < len(kimi_models):
                _set_default_model(config, kimi_models[model_idx])
            elif model_idx == len(kimi_models):
                custom = prompt("Enter model name")
                if custom:
                    _set_default_model(config, custom)
            # else: keep current
        elif selected_provider in ("minimax", "minimax-cn"):
            minimax_models = ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]
            model_choices = list(minimax_models)
            model_choices.append("Custom model")
            model_choices.append(f"Keep current ({current_model})")
            keep_idx = len(model_choices) - 1
            model_idx = prompt_choice("Select default model:", model_choices, keep_idx)
            if model_idx < len(minimax_models):
                _set_default_model(config, minimax_models[model_idx])
            elif model_idx == len(minimax_models):
                custom = prompt("Enter model name")
                if custom:
                    _set_default_model(config, custom)
            # else: keep current
        elif selected_provider == "anthropic":
            # Try live model list first, fall back to static
            from hermes_cli.models import provider_model_ids
            live_models = provider_model_ids("anthropic")
            anthropic_models = live_models if live_models else [
                "claude-opus-4-6",
                "claude-sonnet-4-6",
                "claude-haiku-4-5-20251001",
            ]
            model_choices = list(anthropic_models)
            model_choices.append("Custom model")
            model_choices.append(f"Keep current ({current_model})")
            keep_idx = len(model_choices) - 1
            model_idx = prompt_choice("Select default model:", model_choices, keep_idx)
            if model_idx < len(anthropic_models):
                _set_default_model(config, anthropic_models[model_idx])
            elif model_idx == len(anthropic_models):
                custom = prompt("Enter model name (e.g., claude-sonnet-4-20250514)")
                if custom:
                    _set_default_model(config, custom)
            # else: keep current
        else:
            # Static list for OpenRouter / fallback (from canonical list)
            from hermes_cli.models import model_ids, menu_labels
--- a/pyproject.toml
+++ b/pyproject.toml
@ -13,6 +13,7 @@ license = { text = "MIT" }
 dependencies = [
  # Core
  "openai",
  "anthropic>=0.39.0",
  "python-dotenv",
  "fire",
  "httpx",
--- a/run_agent.py
+++ b/run_agent.py
@ -296,13 +296,16 @@ class AIAgent:
        self.base_url = base_url or OPENROUTER_BASE_URL
        provider_name = provider.strip().lower() if isinstance(provider, str) and provider.strip() else None
        self.provider = provider_name or "openrouter"
-        if api_mode in {"chat_completions", "codex_responses"}:
+        if api_mode in {"chat_completions", "codex_responses", "anthropic_messages"}:
            self.api_mode = api_mode
        elif self.provider == "openai-codex":
            self.api_mode = "codex_responses"
        elif (provider_name is None) and "chatgpt.com/backend-api/codex" in self.base_url.lower():
            self.api_mode = "codex_responses"
            self.provider = "openai-codex"
        elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self.base_url.lower()):
            self.api_mode = "anthropic_messages"
            self.provider = "anthropic"
        else:
            self.api_mode = "chat_completions"
@ -343,7 +346,8 @@ class AIAgent:
        # conversation prefix. Uses system_and_3 strategy (4 breakpoints).
        is_openrouter = "openrouter" in self.base_url.lower()
        is_claude = "claude" in self.model.lower()
-        self._use_prompt_caching = is_openrouter and is_claude
+        is_native_anthropic = self.api_mode == "anthropic_messages"
        self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic
        self._cache_ttl = "5m"  # Default 5-minute TTL (1.25x write cost)
        # Iteration budget pressure: warn the LLM as it approaches max_iterations.
@ -420,66 +424,84 @@ class AIAgent:
                ]:
                    logging.getLogger(quiet_logger).setLevel(logging.ERROR)
-        # Initialize OpenAI client via centralized provider router.
+        # Initialize LLM client via centralized provider router.
        # The router handles auth resolution, base URL, headers, and
-        # Codex wrapping for all known providers.
+        # Codex/Anthropic wrapping for all known providers.
        # raw_codex=True because the main agent needs direct responses.stream()
        # access for Codex Responses API streaming.
-        if api_key and base_url:
+        self._anthropic_client = None
-            # Explicit credentials from CLI/gateway — construct directly.
+
-            # The runtime provider resolver already handled auth for us.
+        if self.api_mode == "anthropic_messages":
-            client_kwargs = {"api_key": api_key, "base_url": base_url}
+            from agent.anthropic_adapter import build_anthropic_client
-            effective_base = base_url
+            effective_key = api_key or os.getenv("ANTHROPIC_API_KEY", "") or os.getenv("ANTHROPIC_TOKEN", "")
-            if "openrouter" in effective_base.lower():
+            if not effective_key:
-                client_kwargs["default_headers"] = {
+                from agent.anthropic_adapter import resolve_anthropic_token
-                    "HTTP-Referer": "https://hermes-agent.nousresearch.com",
+                effective_key = resolve_anthropic_token() or ""
-                    "X-OpenRouter-Title": "Hermes Agent",
+            self._anthropic_api_key = effective_key
-                    "X-OpenRouter-Categories": "productivity,cli-agent",
+            self._anthropic_client = build_anthropic_client(effective_key, base_url if base_url and "anthropic" in base_url else None)
-                }
+            # No OpenAI client needed for Anthropic mode
-            elif "api.kimi.com" in effective_base.lower():
+            self.client = None
-                client_kwargs["default_headers"] = {
+            self._client_kwargs = {}
-                    "User-Agent": "KimiCLI/1.3",
+            if not self.quiet_mode:
-                }
+                print(f"🤖 AI Agent initialized with model: {self.model} (Anthropic native)")
                if effective_key and len(effective_key) > 12:
                    print(f"🔑 Using token: {effective_key[:8]}...{effective_key[-4:]}")
        else:
-            # No explicit creds — use the centralized provider router
+            if api_key and base_url:
-            from agent.auxiliary_client import resolve_provider_client
+                # Explicit credentials from CLI/gateway — construct directly.
-            _routed_client, _ = resolve_provider_client(
+                # The runtime provider resolver already handled auth for us.
-                self.provider or "auto", model=self.model, raw_codex=True)
+                client_kwargs = {"api_key": api_key, "base_url": base_url}
-            if _routed_client is not None:
+                effective_base = base_url
-                client_kwargs = {
+                if "openrouter" in effective_base.lower():
-                    "api_key": _routed_client.api_key,
+                    client_kwargs["default_headers"] = {
                    "base_url": str(_routed_client.base_url),
                }
                # Preserve any default_headers the router set
                if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers:
                    client_kwargs["default_headers"] = dict(_routed_client._default_headers)
            else:
                # Final fallback: try raw OpenRouter key
                client_kwargs = {
                    "api_key": os.getenv("OPENROUTER_API_KEY", ""),
                    "base_url": OPENROUTER_BASE_URL,
                    "default_headers": {
                        "HTTP-Referer": "https://hermes-agent.nousresearch.com",
                        "X-OpenRouter-Title": "Hermes Agent",
                        "X-OpenRouter-Categories": "productivity,cli-agent",
-                    },
+                    }
-                }
+                elif "api.kimi.com" in effective_base.lower():
-        
+                    client_kwargs["default_headers"] = {
-        self._client_kwargs = client_kwargs  # stored for rebuilding after interrupt
+                        "User-Agent": "KimiCLI/1.3",
-        try:
+                    }
-            self.client = OpenAI(**client_kwargs)
+            else:
-            if not self.quiet_mode:
+                # No explicit creds — use the centralized provider router
-                print(f"🤖 AI Agent initialized with model: {self.model}")
+                from agent.auxiliary_client import resolve_provider_client
-                if base_url:
+                _routed_client, _ = resolve_provider_client(
-                    print(f"🔗 Using custom base URL: {base_url}")
+                    self.provider or "auto", model=self.model, raw_codex=True)
-                # Always show API key info (masked) for debugging auth issues
+                if _routed_client is not None:
-                key_used = client_kwargs.get("api_key", "none")
+                    client_kwargs = {
-                if key_used and key_used != "dummy-key" and len(key_used) > 12:
+                        "api_key": _routed_client.api_key,
-                    print(f"🔑 Using API key: {key_used[:8]}...{key_used[-4:]}")
+                        "base_url": str(_routed_client.base_url),
                    }
                    # Preserve any default_headers the router set
                    if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers:
                        client_kwargs["default_headers"] = dict(_routed_client._default_headers)
                else:
-                    print(f"⚠️  Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')")
+                    # Final fallback: try raw OpenRouter key
-        except Exception as e:
+                    client_kwargs = {
-            raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
+                        "api_key": os.getenv("OPENROUTER_API_KEY", ""),
                        "base_url": OPENROUTER_BASE_URL,
                        "default_headers": {
                            "HTTP-Referer": "https://hermes-agent.nousresearch.com",
                            "X-OpenRouter-Title": "Hermes Agent",
                            "X-OpenRouter-Categories": "productivity,cli-agent",
                        },
                    }
            self._client_kwargs = client_kwargs  # stored for rebuilding after interrupt
            try:
                self.client = OpenAI(**client_kwargs)
                if not self.quiet_mode:
                    print(f"🤖 AI Agent initialized with model: {self.model}")
                    if base_url:
                        print(f"🔗 Using custom base URL: {base_url}")
                    # Always show API key info (masked) for debugging auth issues
                    key_used = client_kwargs.get("api_key", "none")
                    if key_used and key_used != "dummy-key" and len(key_used) > 12:
                        print(f"🔑 Using API key: {key_used[:8]}...{key_used[-4:]}")
                    else:
                        print(f"⚠️  Warning: API key appears invalid or missing (got: '{key_used[:20] if key_used else 'none'}...')")
            except Exception as e:
                raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
        # Provider fallback — a single backup model/provider tried when the
        # primary is exhausted (rate-limit, overload, connection failure).
@ -533,7 +555,8 @@ class AIAgent:
        # Show prompt caching status
        if self._use_prompt_caching and not self.quiet_mode:
-            print(f"💾 Prompt caching: ENABLED (Claude via OpenRouter, {self._cache_ttl} TTL)")
+            source = "native Anthropic" if is_native_anthropic else "Claude via OpenRouter"
            print(f"💾 Prompt caching: ENABLED ({source}, {self._cache_ttl} TTL)")
        # Session logging setup - auto-save conversation trajectories for debugging
        self.session_start = datetime.now()
@ -2233,6 +2256,8 @@ class AIAgent:
            try:
                if self.api_mode == "codex_responses":
                    result["response"] = self._run_codex_stream(api_kwargs)
                elif self.api_mode == "anthropic_messages":
                    result["response"] = self._anthropic_client.messages.create(**api_kwargs)
                else:
                    result["response"] = self.client.chat.completions.create(**api_kwargs)
            except Exception as e:
@ -2245,12 +2270,19 @@ class AIAgent:
            if self._interrupt_requested:
                # Force-close the HTTP connection to stop token generation
                try:
-                    self.client.close()
+                    if self.api_mode == "anthropic_messages":
                        self._anthropic_client.close()
                    else:
                        self.client.close()
                except Exception:
                    pass
                # Rebuild the client for future calls (cheap, no network)
                try:
-                    self.client = OpenAI(**self._client_kwargs)
+                    if self.api_mode == "anthropic_messages":
                        from agent.anthropic_adapter import build_anthropic_client
                        self._anthropic_client = build_anthropic_client(self._anthropic_api_key)
                    else:
                        self.client = OpenAI(**self._client_kwargs)
                except Exception:
                    pass
                raise InterruptedError("Agent interrupted during API call")
@ -2336,6 +2368,16 @@ class AIAgent:
    def _build_api_kwargs(self, api_messages: list) -> dict:
        """Build the keyword arguments dict for the active API mode."""
        if self.api_mode == "anthropic_messages":
            from agent.anthropic_adapter import build_anthropic_kwargs
            return build_anthropic_kwargs(
                model=self.model,
                messages=api_messages,
                tools=self.tools,
                max_tokens=None,
                reasoning_config=self.reasoning_config,
            )
        if self.api_mode == "codex_responses":
            instructions = ""
            payload_messages = api_messages
@ -2659,6 +2701,15 @@ class AIAgent:
                if "max_output_tokens" in codex_kwargs:
                    codex_kwargs["max_output_tokens"] = 5120
                response = self._run_codex_stream(codex_kwargs)
            elif not _aux_available and self.api_mode == "anthropic_messages":
                # Native Anthropic — use the Anthropic client directly
                from agent.anthropic_adapter import build_anthropic_kwargs as _build_ant_kwargs
                ant_kwargs = _build_ant_kwargs(
                    model=self.model, messages=api_messages,
                    tools=[memory_tool_def], max_tokens=5120,
                    reasoning_config=None,
                )
                response = self._anthropic_client.messages.create(**ant_kwargs)
            elif not _aux_available:
                api_kwargs = {
                    "model": self.model,
@ -2669,12 +2720,17 @@ class AIAgent:
                }
                response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
-            # Extract tool calls from the response, handling both API formats
+            # Extract tool calls from the response, handling all API formats
            tool_calls = []
            if self.api_mode == "codex_responses" and not _aux_available:
                assistant_msg, _ = self._normalize_codex_response(response)
                if assistant_msg and assistant_msg.tool_calls:
                    tool_calls = assistant_msg.tool_calls
            elif self.api_mode == "anthropic_messages" and not _aux_available:
                from agent.anthropic_adapter import normalize_anthropic_response as _nar_flush
                _flush_msg, _ = _nar_flush(response)
                if _flush_msg and _flush_msg.tool_calls:
                    tool_calls = _flush_msg.tool_calls
            elif hasattr(response, "choices") and response.choices:
                assistant_message = response.choices[0].message
                if assistant_message.tool_calls:
@ -3147,12 +3203,20 @@ class AIAgent:
                if summary_extra_body:
                    summary_kwargs["extra_body"] = summary_extra_body
-                summary_response = self.client.chat.completions.create(**summary_kwargs)
+                if self.api_mode == "anthropic_messages":
-
+                    from agent.anthropic_adapter import build_anthropic_kwargs as _bak, normalize_anthropic_response as _nar
-                if summary_response.choices and summary_response.choices[0].message.content:
+                    _ant_kw = _bak(model=self.model, messages=api_messages, tools=None,
-                    final_response = summary_response.choices[0].message.content
+                                   max_tokens=self.max_tokens, reasoning_config=self.reasoning_config)
                    summary_response = self._anthropic_client.messages.create(**_ant_kw)
                    _msg, _ = _nar(summary_response)
                    final_response = (_msg.content or "").strip()
                else:
-                    final_response = ""
+                    summary_response = self.client.chat.completions.create(**summary_kwargs)
                    if summary_response.choices and summary_response.choices[0].message.content:
                        final_response = summary_response.choices[0].message.content
                    else:
                        final_response = ""
            if final_response:
                if "<think>" in final_response:
@ -3169,6 +3233,13 @@ class AIAgent:
                    retry_response = self._run_codex_stream(codex_kwargs)
                    retry_msg, _ = self._normalize_codex_response(retry_response)
                    final_response = (retry_msg.content or "").strip() if retry_msg else ""
                elif self.api_mode == "anthropic_messages":
                    from agent.anthropic_adapter import build_anthropic_kwargs as _bak2, normalize_anthropic_response as _nar2
                    _ant_kw2 = _bak2(model=self.model, messages=api_messages, tools=None,
                                     max_tokens=self.max_tokens, reasoning_config=self.reasoning_config)
                    retry_response = self._anthropic_client.messages.create(**_ant_kw2)
                    _retry_msg, _ = _nar2(retry_response)
                    final_response = (_retry_msg.content or "").strip()
                else:
                    summary_kwargs = {
                        "model": self.model,
@ -3548,6 +3619,7 @@ class AIAgent:
            compression_attempts = 0
            max_compression_attempts = 3
            codex_auth_retry_attempted = False
            anthropic_auth_retry_attempted = False
            nous_auth_retry_attempted = False
            restart_with_compressed_messages = False
            restart_with_length_continuation = False
@ -3598,6 +3670,17 @@ class AIAgent:
                        elif len(output_items) == 0:
                            response_invalid = True
                            error_details.append("response.output is empty")
                    elif self.api_mode == "anthropic_messages":
                        content_blocks = getattr(response, "content", None) if response is not None else None
                        if response is None:
                            response_invalid = True
                            error_details.append("response is None")
                        elif not isinstance(content_blocks, list):
                            response_invalid = True
                            error_details.append("response.content is not a list")
                        elif len(content_blocks) == 0:
                            response_invalid = True
                            error_details.append("response.content is empty")
                    else:
                        if response is None or not hasattr(response, 'choices') or response.choices is None or len(response.choices) == 0:
                            response_invalid = True
@ -3699,6 +3782,9 @@ class AIAgent:
                            finish_reason = "length"
                        else:
                            finish_reason = "stop"
                    elif self.api_mode == "anthropic_messages":
                        stop_reason_map = {"end_turn": "stop", "tool_use": "tool_calls", "max_tokens": "length", "stop_sequence": "stop"}
                        finish_reason = stop_reason_map.get(response.stop_reason, "stop")
                    else:
                        finish_reason = response.choices[0].finish_reason
@ -3776,7 +3862,7 @@ class AIAgent:
                    # Track actual token usage from response for context management
                    if hasattr(response, 'usage') and response.usage:
-                        if self.api_mode == "codex_responses":
+                        if self.api_mode in ("codex_responses", "anthropic_messages"):
                            prompt_tokens = getattr(response.usage, 'input_tokens', 0) or 0
                            completion_tokens = getattr(response.usage, 'output_tokens', 0) or 0
                            total_tokens = (
@ -3811,9 +3897,15 @@ class AIAgent:
                        # Log cache hit stats when prompt caching is active
                        if self._use_prompt_caching:
-                            details = getattr(response.usage, 'prompt_tokens_details', None)
+                            if self.api_mode == "anthropic_messages":
-                            cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0
+                                # Anthropic uses cache_read_input_tokens / cache_creation_input_tokens
-                            written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0
+                                cached = getattr(response.usage, 'cache_read_input_tokens', 0) or 0
                                written = getattr(response.usage, 'cache_creation_input_tokens', 0) or 0
                            else:
                                # OpenRouter uses prompt_tokens_details.cached_tokens
                                details = getattr(response.usage, 'prompt_tokens_details', None)
                                cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0
                                written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0
                            prompt = usage_dict["prompt_tokens"]
                            hit_pct = (cached / prompt * 100) if prompt > 0 else 0
                            if not self.quiet_mode:
@ -3863,6 +3955,21 @@ class AIAgent:
                        if self._try_refresh_nous_client_credentials(force=True):
                            print(f"{self.log_prefix}🔐 Nous agent key refreshed after 401. Retrying request...")
                            continue
                    if (
                        self.api_mode == "anthropic_messages"
                        and status_code == 401
                        and hasattr(self, '_anthropic_api_key')
                        and not anthropic_auth_retry_attempted
                    ):
                        anthropic_auth_retry_attempted = True
                        # Try re-reading Claude Code credentials (they may have been refreshed)
                        from agent.anthropic_adapter import resolve_anthropic_token, build_anthropic_client
                        new_token = resolve_anthropic_token()
                        if new_token and new_token != self._anthropic_api_key:
                            self._anthropic_api_key = new_token
                            self._anthropic_client = build_anthropic_client(new_token)
                            print(f"{self.log_prefix}🔐 Anthropic credentials refreshed after 401. Retrying request...")
                            continue
                    retry_count += 1
                    elapsed_time = time.time() - api_start_time
@ -4105,6 +4212,9 @@ class AIAgent:
            try:
                if self.api_mode == "codex_responses":
                    assistant_message, finish_reason = self._normalize_codex_response(response)
                elif self.api_mode == "anthropic_messages":
                    from agent.anthropic_adapter import normalize_anthropic_response
                    assistant_message, finish_reason = normalize_anthropic_response(response)
                else:
                    assistant_message = response.choices[0].message
--- a/tests/test_anthropic_adapter.py
+++ b/tests/test_anthropic_adapter.py
@ -0,0 +1,500 @@
 """Tests for agent/anthropic_adapter.py — Anthropic Messages API adapter."""
 import json
 import time
 from types import SimpleNamespace
 from unittest.mock import patch, MagicMock
 import pytest
 from agent.anthropic_adapter import (
    _is_oauth_token,
    build_anthropic_client,
    build_anthropic_kwargs,
    convert_messages_to_anthropic,
    convert_tools_to_anthropic,
    is_claude_code_token_valid,
    normalize_anthropic_response,
    normalize_model_name,
    read_claude_code_credentials,
    resolve_anthropic_token,
 )
 # ---------------------------------------------------------------------------
 # Auth helpers
 # ---------------------------------------------------------------------------
 class TestIsOAuthToken:
    def test_setup_token(self):
        assert _is_oauth_token("sk-ant-oat01-abcdef1234567890") is True
    def test_api_key(self):
        assert _is_oauth_token("sk-ant-api03-abcdef1234567890") is False
    def test_managed_key(self):
        # Managed keys from ~/.claude.json are NOT regular API keys
        assert _is_oauth_token("ou1R1z-ft0A-bDeZ9wAA") is True
    def test_jwt_token(self):
        # JWTs from OAuth flow
        assert _is_oauth_token("eyJhbGciOiJSUzI1NiJ9.test") is True
    def test_empty(self):
        assert _is_oauth_token("") is False
 class TestBuildAnthropicClient:
    def test_setup_token_uses_auth_token(self):
        with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
            build_anthropic_client("sk-ant-oat01-" + "x" * 60)
            kwargs = mock_sdk.Anthropic.call_args[1]
            assert "auth_token" in kwargs
            betas = kwargs["default_headers"]["anthropic-beta"]
            assert "oauth-2025-04-20" in betas
            assert "interleaved-thinking-2025-05-14" in betas
            assert "fine-grained-tool-streaming-2025-05-14" in betas
            assert "api_key" not in kwargs
    def test_api_key_uses_api_key(self):
        with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
            build_anthropic_client("sk-ant-api03-something")
            kwargs = mock_sdk.Anthropic.call_args[1]
            assert kwargs["api_key"] == "sk-ant-api03-something"
            assert "auth_token" not in kwargs
            # API key auth should still get common betas
            betas = kwargs["default_headers"]["anthropic-beta"]
            assert "interleaved-thinking-2025-05-14" in betas
            assert "oauth-2025-04-20" not in betas  # OAuth-only beta NOT present
    def test_custom_base_url(self):
        with patch("agent.anthropic_adapter._anthropic_sdk") as mock_sdk:
            build_anthropic_client("sk-ant-api03-x", base_url="https://custom.api.com")
            kwargs = mock_sdk.Anthropic.call_args[1]
            assert kwargs["base_url"] == "https://custom.api.com"
 class TestReadClaudeCodeCredentials:
    def test_reads_valid_credentials(self, tmp_path, monkeypatch):
        cred_file = tmp_path / ".claude" / ".credentials.json"
        cred_file.parent.mkdir(parents=True)
        cred_file.write_text(json.dumps({
            "claudeAiOauth": {
                "accessToken": "sk-ant-oat01-test-token",
                "refreshToken": "sk-ant-ort01-refresh",
                "expiresAt": int(time.time() * 1000) + 3600_000,
            }
        }))
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
        creds = read_claude_code_credentials()
        assert creds is not None
        assert creds["accessToken"] == "sk-ant-oat01-test-token"
        assert creds["refreshToken"] == "sk-ant-ort01-refresh"
    def test_returns_none_for_missing_file(self, tmp_path, monkeypatch):
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
        assert read_claude_code_credentials() is None
    def test_returns_none_for_missing_oauth_key(self, tmp_path, monkeypatch):
        cred_file = tmp_path / ".claude" / ".credentials.json"
        cred_file.parent.mkdir(parents=True)
        cred_file.write_text(json.dumps({"someOtherKey": {}}))
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
        assert read_claude_code_credentials() is None
    def test_returns_none_for_empty_access_token(self, tmp_path, monkeypatch):
        cred_file = tmp_path / ".claude" / ".credentials.json"
        cred_file.parent.mkdir(parents=True)
        cred_file.write_text(json.dumps({
            "claudeAiOauth": {"accessToken": "", "refreshToken": "x"}
        }))
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
        assert read_claude_code_credentials() is None
 class TestIsClaudeCodeTokenValid:
    def test_valid_token(self):
        creds = {"accessToken": "tok", "expiresAt": int(time.time() * 1000) + 3600_000}
        assert is_claude_code_token_valid(creds) is True
    def test_expired_token(self):
        creds = {"accessToken": "tok", "expiresAt": int(time.time() * 1000) - 3600_000}
        assert is_claude_code_token_valid(creds) is False
    def test_no_expiry_but_has_token(self):
        creds = {"accessToken": "tok", "expiresAt": 0}
        assert is_claude_code_token_valid(creds) is True
 class TestResolveAnthropicToken:
    def test_prefers_api_key(self, monkeypatch):
        monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-mykey")
        monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-mytoken")
        assert resolve_anthropic_token() == "sk-ant-api03-mykey"
    def test_falls_back_to_token(self, monkeypatch):
        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
        monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-mytoken")
        assert resolve_anthropic_token() == "sk-ant-oat01-mytoken"
    def test_returns_none_with_no_creds(self, monkeypatch, tmp_path):
        monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
        monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
        monkeypatch.delenv("CLAUDE_CODE_OAUTH_TOKEN", raising=False)
        monkeypatch.setattr("agent.anthropic_adapter.Path.home", lambda: tmp_path)
        assert resolve_anthropic_token() is None
 # ---------------------------------------------------------------------------
 # Model name normalization
 # ---------------------------------------------------------------------------
 class TestNormalizeModelName:
    def test_strips_anthropic_prefix(self):
        assert normalize_model_name("anthropic/claude-sonnet-4-20250514") == "claude-sonnet-4-20250514"
    def test_leaves_bare_name(self):
        assert normalize_model_name("claude-sonnet-4-20250514") == "claude-sonnet-4-20250514"
 # ---------------------------------------------------------------------------
 # Tool conversion
 # ---------------------------------------------------------------------------
 class TestConvertTools:
    def test_converts_openai_to_anthropic_format(self):
        tools = [
            {
                "type": "function",
                "function": {
                    "name": "search",
                    "description": "Search the web",
                    "parameters": {
                        "type": "object",
                        "properties": {"query": {"type": "string"}},
                        "required": ["query"],
                    },
                },
            }
        ]
        result = convert_tools_to_anthropic(tools)
        assert len(result) == 1
        assert result[0]["name"] == "search"
        assert result[0]["description"] == "Search the web"
        assert result[0]["input_schema"]["properties"]["query"]["type"] == "string"
    def test_empty_tools(self):
        assert convert_tools_to_anthropic([]) == []
        assert convert_tools_to_anthropic(None) == []
 # ---------------------------------------------------------------------------
 # Message conversion
 # ---------------------------------------------------------------------------
 class TestConvertMessages:
    def test_extracts_system_prompt(self):
        messages = [
            {"role": "system", "content": "You are helpful."},
            {"role": "user", "content": "Hello"},
        ]
        system, result = convert_messages_to_anthropic(messages)
        assert system == "You are helpful."
        assert len(result) == 1
        assert result[0]["role"] == "user"
    def test_converts_tool_calls(self):
        messages = [
            {
                "role": "assistant",
                "content": "Let me search.",
                "tool_calls": [
                    {
                        "id": "tc_1",
                        "function": {
                            "name": "search",
                            "arguments": '{"query": "test"}',
                        },
                    }
                ],
            },
            {"role": "tool", "tool_call_id": "tc_1", "content": "search results"},
        ]
        _, result = convert_messages_to_anthropic(messages)
        blocks = result[0]["content"]
        assert blocks[0] == {"type": "text", "text": "Let me search."}
        assert blocks[1]["type"] == "tool_use"
        assert blocks[1]["id"] == "tc_1"
        assert blocks[1]["input"] == {"query": "test"}
    def test_converts_tool_results(self):
        messages = [
            {"role": "tool", "tool_call_id": "tc_1", "content": "result data"},
        ]
        _, result = convert_messages_to_anthropic(messages)
        assert result[0]["role"] == "user"
        assert result[0]["content"][0]["type"] == "tool_result"
        assert result[0]["content"][0]["tool_use_id"] == "tc_1"
    def test_merges_consecutive_tool_results(self):
        messages = [
            {"role": "tool", "tool_call_id": "tc_1", "content": "result 1"},
            {"role": "tool", "tool_call_id": "tc_2", "content": "result 2"},
        ]
        _, result = convert_messages_to_anthropic(messages)
        assert len(result) == 1
        assert len(result[0]["content"]) == 2
    def test_strips_orphaned_tool_use(self):
        messages = [
            {
                "role": "assistant",
                "content": "",
                "tool_calls": [
                    {"id": "tc_orphan", "function": {"name": "x", "arguments": "{}"}}
                ],
            },
            {"role": "user", "content": "never mind"},
        ]
        _, result = convert_messages_to_anthropic(messages)
        # tc_orphan has no matching tool_result, should be stripped
        assistant_blocks = result[0]["content"]
        assert all(b.get("type") != "tool_use" for b in assistant_blocks)
    def test_system_with_cache_control(self):
        messages = [
            {
                "role": "system",
                "content": [
                    {"type": "text", "text": "System prompt", "cache_control": {"type": "ephemeral"}},
                ],
            },
            {"role": "user", "content": "Hi"},
        ]
        system, result = convert_messages_to_anthropic(messages)
        # When cache_control is present, system should be a list of blocks
        assert isinstance(system, list)
        assert system[0]["cache_control"] == {"type": "ephemeral"}
 # ---------------------------------------------------------------------------
 # Build kwargs
 # ---------------------------------------------------------------------------
 class TestBuildAnthropicKwargs:
    def test_basic_kwargs(self):
        messages = [
            {"role": "system", "content": "Be helpful."},
            {"role": "user", "content": "Hi"},
        ]
        kwargs = build_anthropic_kwargs(
            model="claude-sonnet-4-20250514",
            messages=messages,
            tools=None,
            max_tokens=4096,
            reasoning_config=None,
        )
        assert kwargs["model"] == "claude-sonnet-4-20250514"
        assert kwargs["system"] == "Be helpful."
        assert kwargs["max_tokens"] == 4096
        assert "tools" not in kwargs
    def test_strips_anthropic_prefix(self):
        kwargs = build_anthropic_kwargs(
            model="anthropic/claude-sonnet-4-20250514",
            messages=[{"role": "user", "content": "Hi"}],
            tools=None,
            max_tokens=4096,
            reasoning_config=None,
        )
        assert kwargs["model"] == "claude-sonnet-4-20250514"
    def test_reasoning_config_maps_to_thinking(self):
        kwargs = build_anthropic_kwargs(
            model="claude-sonnet-4-20250514",
            messages=[{"role": "user", "content": "think hard"}],
            tools=None,
            max_tokens=4096,
            reasoning_config={"enabled": True, "effort": "high"},
        )
        assert kwargs["thinking"]["type"] == "enabled"
        assert kwargs["thinking"]["budget_tokens"] == 16000
        assert kwargs["max_tokens"] >= 16000 + 4096
    def test_reasoning_disabled(self):
        kwargs = build_anthropic_kwargs(
            model="claude-sonnet-4-20250514",
            messages=[{"role": "user", "content": "quick"}],
            tools=None,
            max_tokens=4096,
            reasoning_config={"enabled": False},
        )
        assert "thinking" not in kwargs
    def test_default_max_tokens(self):
        kwargs = build_anthropic_kwargs(
            model="claude-sonnet-4-20250514",
            messages=[{"role": "user", "content": "Hi"}],
            tools=None,
            max_tokens=None,
            reasoning_config=None,
        )
        assert kwargs["max_tokens"] == 16384
 # ---------------------------------------------------------------------------
 # Response normalization
 # ---------------------------------------------------------------------------
 class TestNormalizeResponse:
    def _make_response(self, content_blocks, stop_reason="end_turn"):
        resp = SimpleNamespace()
        resp.content = content_blocks
        resp.stop_reason = stop_reason
        resp.usage = SimpleNamespace(input_tokens=100, output_tokens=50)
        return resp
    def test_text_response(self):
        block = SimpleNamespace(type="text", text="Hello world")
        msg, reason = normalize_anthropic_response(self._make_response([block]))
        assert msg.content == "Hello world"
        assert reason == "stop"
        assert msg.tool_calls is None
    def test_tool_use_response(self):
        blocks = [
            SimpleNamespace(type="text", text="Searching..."),
            SimpleNamespace(
                type="tool_use",
                id="tc_1",
                name="search",
                input={"query": "test"},
            ),
        ]
        msg, reason = normalize_anthropic_response(
            self._make_response(blocks, "tool_use")
        )
        assert msg.content == "Searching..."
        assert reason == "tool_calls"
        assert len(msg.tool_calls) == 1
        assert msg.tool_calls[0].function.name == "search"
        assert json.loads(msg.tool_calls[0].function.arguments) == {"query": "test"}
    def test_thinking_response(self):
        blocks = [
            SimpleNamespace(type="thinking", thinking="Let me reason about this..."),
            SimpleNamespace(type="text", text="The answer is 42."),
        ]
        msg, reason = normalize_anthropic_response(self._make_response(blocks))
        assert msg.content == "The answer is 42."
        assert msg.reasoning == "Let me reason about this..."
    def test_stop_reason_mapping(self):
        block = SimpleNamespace(type="text", text="x")
        _, r1 = normalize_anthropic_response(
            self._make_response([block], "end_turn")
        )
        _, r2 = normalize_anthropic_response(
            self._make_response([block], "tool_use")
        )
        _, r3 = normalize_anthropic_response(
            self._make_response([block], "max_tokens")
        )
        assert r1 == "stop"
        assert r2 == "tool_calls"
        assert r3 == "length"
    def test_no_text_content(self):
        block = SimpleNamespace(
            type="tool_use", id="tc_1", name="search", input={"q": "hi"}
        )
        msg, reason = normalize_anthropic_response(
            self._make_response([block], "tool_use")
        )
        assert msg.content is None
        assert len(msg.tool_calls) == 1
 # ---------------------------------------------------------------------------
 # Role alternation
 # ---------------------------------------------------------------------------
 class TestRoleAlternation:
    def test_merges_consecutive_user_messages(self):
        messages = [
            {"role": "user", "content": "Hello"},
            {"role": "user", "content": "World"},
        ]
        _, result = convert_messages_to_anthropic(messages)
        assert len(result) == 1
        assert result[0]["role"] == "user"
        assert "Hello" in result[0]["content"]
        assert "World" in result[0]["content"]
    def test_preserves_proper_alternation(self):
        messages = [
            {"role": "user", "content": "Hi"},
            {"role": "assistant", "content": "Hello!"},
            {"role": "user", "content": "How are you?"},
        ]
        _, result = convert_messages_to_anthropic(messages)
        assert len(result) == 3
        assert [m["role"] for m in result] == ["user", "assistant", "user"]
 # ---------------------------------------------------------------------------
 # Tool choice
 # ---------------------------------------------------------------------------
 class TestToolChoice:
    _DUMMY_TOOL = [
        {
            "type": "function",
            "function": {
                "name": "test",
                "description": "x",
                "parameters": {"type": "object", "properties": {}},
            },
        }
    ]
    def test_auto_tool_choice(self):
        kwargs = build_anthropic_kwargs(
            model="claude-sonnet-4-20250514",
            messages=[{"role": "user", "content": "Hi"}],
            tools=self._DUMMY_TOOL,
            max_tokens=4096,
            reasoning_config=None,
            tool_choice="auto",
        )
        assert kwargs["tool_choice"] == {"type": "auto"}
    def test_required_tool_choice(self):
        kwargs = build_anthropic_kwargs(
            model="claude-sonnet-4-20250514",
            messages=[{"role": "user", "content": "Hi"}],
            tools=self._DUMMY_TOOL,
            max_tokens=4096,
            reasoning_config=None,
            tool_choice="required",
        )
        assert kwargs["tool_choice"] == {"type": "any"}
    def test_specific_tool_choice(self):
        kwargs = build_anthropic_kwargs(
            model="claude-sonnet-4-20250514",
            messages=[{"role": "user", "content": "Hi"}],
            tools=self._DUMMY_TOOL,
            max_tokens=4096,
            reasoning_config=None,
            tool_choice="search",
        )
        assert kwargs["tool_choice"] == {"type": "tool", "name": "search"}
--- a/tests/test_run_agent.py
+++ b/tests/test_run_agent.py
@ -281,20 +281,21 @@ class TestMaskApiKey:
 class TestInit:
    def test_anthropic_base_url_accepted(self):
-        """Anthropic base URLs should be accepted (OpenAI-compatible endpoint)."""
+        """Anthropic base URLs should route to native Anthropic client."""
        with (
            patch("run_agent.get_tool_definitions", return_value=[]),
            patch("run_agent.check_toolset_requirements", return_value={}),
-            patch("run_agent.OpenAI") as mock_openai,
+            patch("agent.anthropic_adapter._anthropic_sdk") as mock_anthropic,
        ):
-            AIAgent(
+            agent = AIAgent(
                api_key="test-key-1234567890",
                base_url="https://api.anthropic.com/v1/",
                quiet_mode=True,
                skip_context_files=True,
                skip_memory=True,
            )
-            mock_openai.assert_called_once()
+            assert agent.api_mode == "anthropic_messages"
            mock_anthropic.Anthropic.assert_called_once()
    def test_prompt_caching_claude_openrouter(self):
        """Claude model via OpenRouter should enable prompt caching."""
@ -345,6 +346,23 @@ class TestInit:
            )
            assert a._use_prompt_caching is False
    def test_prompt_caching_native_anthropic(self):
        """Native Anthropic provider should enable prompt caching."""
        with (
            patch("run_agent.get_tool_definitions", return_value=[]),
            patch("run_agent.check_toolset_requirements", return_value={}),
            patch("agent.anthropic_adapter._anthropic_sdk"),
        ):
            a = AIAgent(
                api_key="test-key-1234567890",
                base_url="https://api.anthropic.com/v1/",
                quiet_mode=True,
                skip_context_files=True,
                skip_memory=True,
            )
            assert a.api_mode == "anthropic_messages"
            assert a._use_prompt_caching is True
    def test_valid_tool_names_populated(self):
        """valid_tool_names should contain names from loaded tools."""
        tools = _make_tool_defs("web_search", "terminal")
--- a/website/docs/getting-started/quickstart.md
+++ b/website/docs/getting-started/quickstart.md
@ -43,6 +43,7 @@ hermes setup       # Or configure everything at once
 |----------|-----------|---------------|
 | **Nous Portal** | Subscription-based, zero-config | OAuth login via `hermes model` |
 | **OpenAI Codex** | ChatGPT OAuth, uses Codex models | Device code auth via `hermes model` |
 | **Anthropic** | Claude models directly (Pro/Max or API key) | API key or Claude Code setup-token |
 | **OpenRouter** | 200+ models, pay-per-use | Enter your API key |
 | **Custom Endpoint** | VLLM, SGLang, any OpenAI-compatible API | Set base URL + API key |
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@ -23,6 +23,9 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
 | `MINIMAX_BASE_URL` | Override MiniMax base URL (default: `https://api.minimax.io/v1`) |
 | `MINIMAX_CN_API_KEY` | MiniMax API key — China endpoint ([minimaxi.com](https://www.minimaxi.com)) |
 | `MINIMAX_CN_BASE_URL` | Override MiniMax China base URL (default: `https://api.minimaxi.com/v1`) |
 | `ANTHROPIC_API_KEY` | Anthropic API key or setup-token ([console.anthropic.com](https://console.anthropic.com/)) |
 | `ANTHROPIC_TOKEN` | Anthropic OAuth/setup token (alternative to `ANTHROPIC_API_KEY`) |
 | `CLAUDE_CODE_OAUTH_TOKEN` | Claude Code setup-token (same as `ANTHROPIC_TOKEN`) |
 | `HERMES_MODEL` | Preferred model name (checked before `LLM_MODEL`, used by gateway) |
 | `LLM_MODEL` | Default model name (fallback when not set in config.yaml) |
 | `VOICE_TOOLS_OPENAI_KEY` | OpenAI key for TTS and voice transcription (separate from custom endpoint) |
@ -32,7 +35,7 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
 | Variable | Description |
 |----------|-------------|
-| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `zai`, `kimi-coding`, `minimax`, `minimax-cn` (default: `auto`) |
+| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `openrouter`, `nous`, `anthropic`, `zai`, `kimi-coding`, `minimax`, `minimax-cn` (default: `auto`) |
 | `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) |
 | `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL |
 | `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |
--- a/website/docs/user-guide/configuration.md
+++ b/website/docs/user-guide/configuration.md
@ -63,6 +63,7 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
 |----------|-------|
 | **Nous Portal** | `hermes model` (OAuth, subscription-based) |
 | **OpenAI Codex** | `hermes model` (ChatGPT OAuth, uses Codex models) |
 | **Anthropic** | `hermes model` (API key, setup-token, or Claude Code auto-detect) |
 | **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` |
 | **z.ai / GLM** | `GLM_API_KEY` in `~/.hermes/.env` (provider: `zai`) |
 | **Kimi / Moonshot** | `KIMI_API_KEY` in `~/.hermes/.env` (provider: `kimi-coding`) |
@ -78,6 +79,34 @@ The OpenAI Codex provider authenticates via device code (open a URL, enter a cod
 Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use a separate "auxiliary" model — by default Gemini Flash via OpenRouter. An `OPENROUTER_API_KEY` enables these tools automatically. You can also configure which model and provider these tools use — see [Auxiliary Models](#auxiliary-models) below.
 :::
 ### Anthropic (Native)
 Use Claude models directly through the Anthropic API — no OpenRouter proxy needed. Supports three auth methods:
 ```bash
 # With an API key (pay-per-token)
 export ANTHROPIC_API_KEY=sk-ant-api03-...
 hermes chat --provider anthropic --model claude-sonnet-4-6
 # With a Claude Code setup-token (Pro/Max subscription)
 export ANTHROPIC_API_KEY=sk-ant-oat01-...  # from 'claude setup-token'
 hermes chat --provider anthropic
 # Auto-detect Claude Code credentials (if you have Claude Code installed)
 hermes chat --provider anthropic  # reads ~/.claude.json automatically
 ```
 Or set it permanently:
 ```yaml
 model:
  provider: "anthropic"
  default: "claude-sonnet-4-6"
 ```
 :::tip Aliases
 `--provider claude` and `--provider claude-code` also work as shorthand for `--provider anthropic`.
 :::
 ### First-Class Chinese AI Providers
 These providers have built-in support with dedicated provider IDs. Set the API key and use `--provider` to select: