Merge origin/main into hermes/hermes-daa73839
This commit is contained in:
commit
62abb453d3
88 changed files with 5267 additions and 687 deletions
|
|
@ -102,30 +102,15 @@ def build_anthropic_client(api_key: str, base_url: str = None):
|
|||
|
||||
|
||||
def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
|
||||
"""Read credentials from Claude Code's config files.
|
||||
"""Read refreshable Claude Code OAuth credentials from ~/.claude/.credentials.json.
|
||||
|
||||
Checks two locations (in order):
|
||||
1. ~/.claude.json — top-level primaryApiKey (native binary, v2.x)
|
||||
2. ~/.claude/.credentials.json — claudeAiOauth block (npm/legacy installs)
|
||||
This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
|
||||
subscription flow is OAuth/setup-token based with refreshable credentials,
|
||||
and native direct Anthropic provider usage should follow that path rather
|
||||
than auto-detecting Claude's first-party managed key.
|
||||
|
||||
Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
|
||||
"""
|
||||
# 1. Native binary (v2.x): ~/.claude.json with top-level primaryApiKey
|
||||
claude_json = Path.home() / ".claude.json"
|
||||
if claude_json.exists():
|
||||
try:
|
||||
data = json.loads(claude_json.read_text(encoding="utf-8"))
|
||||
primary_key = data.get("primaryApiKey", "")
|
||||
if primary_key:
|
||||
return {
|
||||
"accessToken": primary_key,
|
||||
"refreshToken": "",
|
||||
"expiresAt": 0, # Managed keys don't have a user-visible expiry
|
||||
}
|
||||
except (json.JSONDecodeError, OSError, IOError) as e:
|
||||
logger.debug("Failed to read ~/.claude.json: %s", e)
|
||||
|
||||
# 2. Legacy/npm installs: ~/.claude/.credentials.json
|
||||
cred_path = Path.home() / ".claude" / ".credentials.json"
|
||||
if cred_path.exists():
|
||||
try:
|
||||
|
|
@ -138,6 +123,7 @@ def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
|
|||
"accessToken": access_token,
|
||||
"refreshToken": oauth_data.get("refreshToken", ""),
|
||||
"expiresAt": oauth_data.get("expiresAt", 0),
|
||||
"source": "claude_code_credentials_file",
|
||||
}
|
||||
except (json.JSONDecodeError, OSError, IOError) as e:
|
||||
logger.debug("Failed to read ~/.claude/.credentials.json: %s", e)
|
||||
|
|
@ -145,6 +131,20 @@ def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
|
|||
return None
|
||||
|
||||
|
||||
def read_claude_managed_key() -> Optional[str]:
|
||||
"""Read Claude's native managed key from ~/.claude.json for diagnostics only."""
|
||||
claude_json = Path.home() / ".claude.json"
|
||||
if claude_json.exists():
|
||||
try:
|
||||
data = json.loads(claude_json.read_text(encoding="utf-8"))
|
||||
primary_key = data.get("primaryApiKey", "")
|
||||
if isinstance(primary_key, str) and primary_key.strip():
|
||||
return primary_key.strip()
|
||||
except (json.JSONDecodeError, OSError, IOError) as e:
|
||||
logger.debug("Failed to read ~/.claude.json: %s", e)
|
||||
return None
|
||||
|
||||
|
||||
def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
|
||||
"""Check if Claude Code credentials have a non-expired access token."""
|
||||
import time
|
||||
|
|
@ -273,6 +273,35 @@ def _prefer_refreshable_claude_code_token(env_token: str, creds: Optional[Dict[s
|
|||
return None
|
||||
|
||||
|
||||
def get_anthropic_token_source(token: Optional[str] = None) -> str:
|
||||
"""Best-effort source classification for an Anthropic credential token."""
|
||||
token = (token or "").strip()
|
||||
if not token:
|
||||
return "none"
|
||||
|
||||
env_token = os.getenv("ANTHROPIC_TOKEN", "").strip()
|
||||
if env_token and env_token == token:
|
||||
return "anthropic_token_env"
|
||||
|
||||
cc_env_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip()
|
||||
if cc_env_token and cc_env_token == token:
|
||||
return "claude_code_oauth_token_env"
|
||||
|
||||
creds = read_claude_code_credentials()
|
||||
if creds and creds.get("accessToken") == token:
|
||||
return str(creds.get("source") or "claude_code_credentials")
|
||||
|
||||
managed_key = read_claude_managed_key()
|
||||
if managed_key and managed_key == token:
|
||||
return "claude_json_primary_api_key"
|
||||
|
||||
api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
|
||||
if api_key and api_key == token:
|
||||
return "anthropic_api_key_env"
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
def resolve_anthropic_token() -> Optional[str]:
|
||||
"""Resolve an Anthropic token from all available sources.
|
||||
|
||||
|
|
@ -391,6 +420,68 @@ def _sanitize_tool_id(tool_id: str) -> str:
|
|||
return sanitized or "tool_0"
|
||||
|
||||
|
||||
def _convert_openai_image_part_to_anthropic(part: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||
"""Convert an OpenAI-style image block to Anthropic's image source format."""
|
||||
image_data = part.get("image_url", {})
|
||||
url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data)
|
||||
if not isinstance(url, str) or not url.strip():
|
||||
return None
|
||||
url = url.strip()
|
||||
|
||||
if url.startswith("data:"):
|
||||
header, sep, data = url.partition(",")
|
||||
if sep and ";base64" in header:
|
||||
media_type = header[5:].split(";", 1)[0] or "image/png"
|
||||
return {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": media_type,
|
||||
"data": data,
|
||||
},
|
||||
}
|
||||
|
||||
if url.startswith("http://") or url.startswith("https://"):
|
||||
return {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "url",
|
||||
"url": url,
|
||||
},
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _convert_user_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
|
||||
if isinstance(part, dict):
|
||||
ptype = part.get("type")
|
||||
if ptype == "text":
|
||||
block = {"type": "text", "text": part.get("text", "")}
|
||||
if isinstance(part.get("cache_control"), dict):
|
||||
block["cache_control"] = dict(part["cache_control"])
|
||||
return block
|
||||
if ptype == "image_url":
|
||||
return _convert_openai_image_part_to_anthropic(part)
|
||||
if ptype == "image" and part.get("source"):
|
||||
return dict(part)
|
||||
if ptype == "image" and part.get("data"):
|
||||
media_type = part.get("mimeType") or part.get("media_type") or "image/png"
|
||||
return {
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": media_type,
|
||||
"data": part.get("data", ""),
|
||||
},
|
||||
}
|
||||
if ptype == "tool_result":
|
||||
return dict(part)
|
||||
elif part is not None:
|
||||
return {"type": "text", "text": str(part)}
|
||||
return None
|
||||
|
||||
|
||||
def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
|
||||
"""Convert OpenAI tool definitions to Anthropic format."""
|
||||
if not tools:
|
||||
|
|
@ -553,7 +644,14 @@ def convert_messages_to_anthropic(
|
|||
continue
|
||||
|
||||
# Regular user message
|
||||
result.append({"role": "user", "content": _convert_content_to_anthropic(content)})
|
||||
if isinstance(content, list):
|
||||
converted_blocks = _convert_content_to_anthropic(content)
|
||||
result.append({
|
||||
"role": "user",
|
||||
"content": converted_blocks or [{"type": "text", "text": ""}],
|
||||
})
|
||||
else:
|
||||
result.append({"role": "user", "content": content})
|
||||
|
||||
# Strip orphaned tool_use blocks (no matching tool_result follows)
|
||||
tool_result_ids = set()
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
"""Shared auxiliary OpenAI client for cheap/fast side tasks.
|
||||
"""Shared auxiliary client router for side tasks.
|
||||
|
||||
Provides a single resolution chain so every consumer (context compression,
|
||||
session search, web extraction, vision analysis, browser vision) picks up
|
||||
|
|
@ -10,26 +10,30 @@ Resolution order for text tasks (auto mode):
|
|||
3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
|
||||
4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
|
||||
wrapped to look like a chat.completions client)
|
||||
5. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, MiniMax-CN)
|
||||
— checked via PROVIDER_REGISTRY entries with auth_type='api_key'
|
||||
6. None
|
||||
5. Native Anthropic
|
||||
6. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, MiniMax-CN)
|
||||
7. None
|
||||
|
||||
Resolution order for vision/multimodal tasks (auto mode):
|
||||
1. OpenRouter
|
||||
2. Nous Portal
|
||||
3. Codex OAuth (gpt-5.3-codex supports vision via Responses API)
|
||||
4. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
|
||||
5. None (API-key providers like z.ai/Kimi/MiniMax are skipped —
|
||||
they may not support multimodal)
|
||||
1. Selected main provider, if it is one of the supported vision backends below
|
||||
2. OpenRouter
|
||||
3. Nous Portal
|
||||
4. Codex OAuth (gpt-5.3-codex supports vision via Responses API)
|
||||
5. Native Anthropic
|
||||
6. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
|
||||
7. None
|
||||
|
||||
Per-task provider overrides (e.g. AUXILIARY_VISION_PROVIDER,
|
||||
CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task:
|
||||
"openrouter", "nous", "codex", or "main" (= steps 3-5).
|
||||
CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task.
|
||||
Default "auto" follows the chains above.
|
||||
|
||||
Per-task model overrides (e.g. AUXILIARY_VISION_MODEL,
|
||||
AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug
|
||||
than the provider's default.
|
||||
|
||||
Per-task direct endpoint overrides (e.g. AUXILIARY_VISION_BASE_URL,
|
||||
AUXILIARY_VISION_API_KEY) let callers route a specific auxiliary task to a
|
||||
custom OpenAI-compatible endpoint without touching the main model settings.
|
||||
"""
|
||||
|
||||
import json
|
||||
|
|
@ -74,6 +78,7 @@ auxiliary_is_nous: bool = False
|
|||
_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
|
||||
_NOUS_MODEL = "gemini-3-flash"
|
||||
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
|
||||
_ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
|
||||
_AUTH_JSON_PATH = get_hermes_home() / "auth.json"
|
||||
|
||||
# Codex fallback: uses the Responses API (the only endpoint the Codex
|
||||
|
|
@ -312,6 +317,114 @@ class AsyncCodexAuxiliaryClient:
|
|||
self.base_url = sync_wrapper.base_url
|
||||
|
||||
|
||||
class _AnthropicCompletionsAdapter:
|
||||
"""OpenAI-client-compatible adapter for Anthropic Messages API."""
|
||||
|
||||
def __init__(self, real_client: Any, model: str):
|
||||
self._client = real_client
|
||||
self._model = model
|
||||
|
||||
def create(self, **kwargs) -> Any:
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs, normalize_anthropic_response
|
||||
|
||||
messages = kwargs.get("messages", [])
|
||||
model = kwargs.get("model", self._model)
|
||||
tools = kwargs.get("tools")
|
||||
tool_choice = kwargs.get("tool_choice")
|
||||
max_tokens = kwargs.get("max_tokens") or kwargs.get("max_completion_tokens") or 2000
|
||||
temperature = kwargs.get("temperature")
|
||||
|
||||
normalized_tool_choice = None
|
||||
if isinstance(tool_choice, str):
|
||||
normalized_tool_choice = tool_choice
|
||||
elif isinstance(tool_choice, dict):
|
||||
choice_type = str(tool_choice.get("type", "")).lower()
|
||||
if choice_type == "function":
|
||||
normalized_tool_choice = tool_choice.get("function", {}).get("name")
|
||||
elif choice_type in {"auto", "required", "none"}:
|
||||
normalized_tool_choice = choice_type
|
||||
|
||||
anthropic_kwargs = build_anthropic_kwargs(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
max_tokens=max_tokens,
|
||||
reasoning_config=None,
|
||||
tool_choice=normalized_tool_choice,
|
||||
)
|
||||
if temperature is not None:
|
||||
anthropic_kwargs["temperature"] = temperature
|
||||
|
||||
response = self._client.messages.create(**anthropic_kwargs)
|
||||
assistant_message, finish_reason = normalize_anthropic_response(response)
|
||||
|
||||
usage = None
|
||||
if hasattr(response, "usage") and response.usage:
|
||||
prompt_tokens = getattr(response.usage, "input_tokens", 0) or 0
|
||||
completion_tokens = getattr(response.usage, "output_tokens", 0) or 0
|
||||
total_tokens = getattr(response.usage, "total_tokens", 0) or (prompt_tokens + completion_tokens)
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
total_tokens=total_tokens,
|
||||
)
|
||||
|
||||
choice = SimpleNamespace(
|
||||
index=0,
|
||||
message=assistant_message,
|
||||
finish_reason=finish_reason,
|
||||
)
|
||||
return SimpleNamespace(
|
||||
choices=[choice],
|
||||
model=model,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
|
||||
class _AnthropicChatShim:
|
||||
def __init__(self, adapter: _AnthropicCompletionsAdapter):
|
||||
self.completions = adapter
|
||||
|
||||
|
||||
class AnthropicAuxiliaryClient:
|
||||
"""OpenAI-client-compatible wrapper over a native Anthropic client."""
|
||||
|
||||
def __init__(self, real_client: Any, model: str, api_key: str, base_url: str):
|
||||
self._real_client = real_client
|
||||
adapter = _AnthropicCompletionsAdapter(real_client, model)
|
||||
self.chat = _AnthropicChatShim(adapter)
|
||||
self.api_key = api_key
|
||||
self.base_url = base_url
|
||||
|
||||
def close(self):
|
||||
close_fn = getattr(self._real_client, "close", None)
|
||||
if callable(close_fn):
|
||||
close_fn()
|
||||
|
||||
|
||||
class _AsyncAnthropicCompletionsAdapter:
|
||||
def __init__(self, sync_adapter: _AnthropicCompletionsAdapter):
|
||||
self._sync = sync_adapter
|
||||
|
||||
async def create(self, **kwargs) -> Any:
|
||||
import asyncio
|
||||
return await asyncio.to_thread(self._sync.create, **kwargs)
|
||||
|
||||
|
||||
class _AsyncAnthropicChatShim:
|
||||
def __init__(self, adapter: _AsyncAnthropicCompletionsAdapter):
|
||||
self.completions = adapter
|
||||
|
||||
|
||||
class AsyncAnthropicAuxiliaryClient:
|
||||
def __init__(self, sync_wrapper: "AnthropicAuxiliaryClient"):
|
||||
sync_adapter = sync_wrapper.chat.completions
|
||||
async_adapter = _AsyncAnthropicCompletionsAdapter(sync_adapter)
|
||||
self.chat = _AsyncAnthropicChatShim(async_adapter)
|
||||
self.api_key = sync_wrapper.api_key
|
||||
self.base_url = sync_wrapper.base_url
|
||||
|
||||
|
||||
def _read_nous_auth() -> Optional[dict]:
|
||||
"""Read and validate ~/.hermes/auth.json for an active Nous provider.
|
||||
|
||||
|
|
@ -383,6 +496,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||
break
|
||||
if not api_key:
|
||||
continue
|
||||
if provider_id == "anthropic":
|
||||
return _try_anthropic()
|
||||
|
||||
# Resolve base URL (with optional env-var override)
|
||||
# Kimi Code keys (sk-kimi-) need api.kimi.com/coding/v1
|
||||
env_url = ""
|
||||
|
|
@ -421,6 +537,17 @@ def _get_auxiliary_provider(task: str = "") -> str:
|
|||
return "auto"
|
||||
|
||||
|
||||
def _get_auxiliary_env_override(task: str, suffix: str) -> Optional[str]:
|
||||
"""Read an auxiliary env override from AUXILIARY_* or CONTEXT_* prefixes."""
|
||||
if not task:
|
||||
return None
|
||||
for prefix in ("AUXILIARY_", "CONTEXT_"):
|
||||
val = os.getenv(f"{prefix}{task.upper()}_{suffix}", "").strip()
|
||||
if val:
|
||||
return val
|
||||
return None
|
||||
|
||||
|
||||
def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
or_key = os.getenv("OPENROUTER_API_KEY")
|
||||
if not or_key:
|
||||
|
|
@ -522,6 +649,22 @@ def _try_codex() -> Tuple[Optional[Any], Optional[str]]:
|
|||
return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
|
||||
|
||||
|
||||
def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
|
||||
try:
|
||||
from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token
|
||||
except ImportError:
|
||||
return None, None
|
||||
|
||||
token = resolve_anthropic_token()
|
||||
if not token:
|
||||
return None, None
|
||||
|
||||
model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
|
||||
logger.debug("Auxiliary client: Anthropic native (%s)", model)
|
||||
real_client = build_anthropic_client(token, _ANTHROPIC_DEFAULT_BASE_URL)
|
||||
return AnthropicAuxiliaryClient(real_client, model, token, _ANTHROPIC_DEFAULT_BASE_URL), model
|
||||
|
||||
|
||||
def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Resolve a specific forced provider. Returns (None, None) if creds missing."""
|
||||
if forced == "openrouter":
|
||||
|
|
@ -584,6 +727,8 @@ def _to_async_client(sync_client, model: str):
|
|||
|
||||
if isinstance(sync_client, CodexAuxiliaryClient):
|
||||
return AsyncCodexAuxiliaryClient(sync_client), model
|
||||
if isinstance(sync_client, AnthropicAuxiliaryClient):
|
||||
return AsyncAnthropicAuxiliaryClient(sync_client), model
|
||||
|
||||
async_kwargs = {
|
||||
"api_key": sync_client.api_key,
|
||||
|
|
@ -602,6 +747,8 @@ def resolve_provider_client(
|
|||
model: str = None,
|
||||
async_mode: bool = False,
|
||||
raw_codex: bool = False,
|
||||
explicit_base_url: str = None,
|
||||
explicit_api_key: str = None,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Central router: given a provider name and optional model, return a
|
||||
configured client with the correct auth, base URL, and API format.
|
||||
|
|
@ -623,6 +770,8 @@ def resolve_provider_client(
|
|||
instead of wrapping in CodexAuxiliaryClient. Use this when
|
||||
the caller needs direct access to responses.stream() (e.g.,
|
||||
the main agent loop).
|
||||
explicit_base_url: Optional direct OpenAI-compatible endpoint.
|
||||
explicit_api_key: Optional API key paired with explicit_base_url.
|
||||
|
||||
Returns:
|
||||
(client, resolved_model) or (None, None) if auth is unavailable.
|
||||
|
|
@ -699,6 +848,22 @@ def resolve_provider_client(
|
|||
|
||||
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
|
||||
if provider == "custom":
|
||||
if explicit_base_url:
|
||||
custom_base = explicit_base_url.strip()
|
||||
custom_key = (
|
||||
(explicit_api_key or "").strip()
|
||||
or os.getenv("OPENAI_API_KEY", "").strip()
|
||||
)
|
||||
if not custom_base or not custom_key:
|
||||
logger.warning(
|
||||
"resolve_provider_client: explicit custom endpoint requested "
|
||||
"but no API key was found (set explicit_api_key or OPENAI_API_KEY)"
|
||||
)
|
||||
return None, None
|
||||
final_model = model or _read_main_model() or "gpt-4o-mini"
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
# Try custom first, then codex, then API-key providers
|
||||
for try_fn in (_try_custom_endpoint, _try_codex,
|
||||
_resolve_api_key_provider):
|
||||
|
|
@ -724,6 +889,14 @@ def resolve_provider_client(
|
|||
return None, None
|
||||
|
||||
if pconfig.auth_type == "api_key":
|
||||
if provider == "anthropic":
|
||||
client, default_model = _try_anthropic()
|
||||
if client is None:
|
||||
logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found")
|
||||
return None, None
|
||||
final_model = model or default_model
|
||||
return (_to_async_client(client, final_model) if async_mode else (client, final_model))
|
||||
|
||||
# Find the first configured API key
|
||||
api_key = ""
|
||||
for env_var in pconfig.api_key_env_vars:
|
||||
|
|
@ -787,10 +960,13 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
|
|||
Callers may override the returned model with a per-task env var
|
||||
(e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
|
||||
"""
|
||||
forced = _get_auxiliary_provider(task)
|
||||
if forced != "auto":
|
||||
return resolve_provider_client(forced)
|
||||
return resolve_provider_client("auto")
|
||||
provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
|
||||
return resolve_provider_client(
|
||||
provider,
|
||||
model=model,
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
)
|
||||
|
||||
|
||||
def get_async_text_auxiliary_client(task: str = ""):
|
||||
|
|
@ -800,16 +976,21 @@ def get_async_text_auxiliary_client(task: str = ""):
|
|||
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
|
||||
Returns (None, None) when no provider is available.
|
||||
"""
|
||||
forced = _get_auxiliary_provider(task)
|
||||
if forced != "auto":
|
||||
return resolve_provider_client(forced, async_mode=True)
|
||||
return resolve_provider_client("auto", async_mode=True)
|
||||
provider, model, base_url, api_key = _resolve_task_provider_model(task or None)
|
||||
return resolve_provider_client(
|
||||
provider,
|
||||
model=model,
|
||||
async_mode=True,
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
)
|
||||
|
||||
|
||||
_VISION_AUTO_PROVIDER_ORDER = (
|
||||
"openrouter",
|
||||
"nous",
|
||||
"openai-codex",
|
||||
"anthropic",
|
||||
"custom",
|
||||
)
|
||||
|
||||
|
|
@ -831,6 +1012,8 @@ def _resolve_strict_vision_backend(provider: str) -> Tuple[Optional[Any], Option
|
|||
return _try_nous()
|
||||
if provider == "openai-codex":
|
||||
return _try_codex()
|
||||
if provider == "anthropic":
|
||||
return _try_anthropic()
|
||||
if provider == "custom":
|
||||
return _try_custom_endpoint()
|
||||
return None, None
|
||||
|
|
@ -840,45 +1023,79 @@ def _strict_vision_backend_available(provider: str) -> bool:
|
|||
return _resolve_strict_vision_backend(provider)[0] is not None
|
||||
|
||||
|
||||
def _preferred_main_vision_provider() -> Optional[str]:
|
||||
"""Return the selected main provider when it is also a supported vision backend."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
config = load_config()
|
||||
model_cfg = config.get("model", {})
|
||||
if isinstance(model_cfg, dict):
|
||||
provider = _normalize_vision_provider(model_cfg.get("provider", ""))
|
||||
if provider in _VISION_AUTO_PROVIDER_ORDER:
|
||||
return provider
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def get_available_vision_backends() -> List[str]:
|
||||
"""Return the currently available vision backends in auto-selection order.
|
||||
|
||||
This is the single source of truth for setup, tool gating, and runtime
|
||||
auto-routing of vision tasks. Phase 1 keeps the auto list conservative:
|
||||
OpenRouter, Nous Portal, Codex OAuth, then custom OpenAI-compatible
|
||||
endpoints. Explicit provider overrides can still route elsewhere.
|
||||
auto-routing of vision tasks. The selected main provider is preferred when
|
||||
it is also a known-good vision backend; otherwise Hermes falls back through
|
||||
the standard conservative order.
|
||||
"""
|
||||
return [
|
||||
provider
|
||||
for provider in _VISION_AUTO_PROVIDER_ORDER
|
||||
if _strict_vision_backend_available(provider)
|
||||
]
|
||||
ordered = list(_VISION_AUTO_PROVIDER_ORDER)
|
||||
preferred = _preferred_main_vision_provider()
|
||||
if preferred in ordered:
|
||||
ordered.remove(preferred)
|
||||
ordered.insert(0, preferred)
|
||||
return [provider for provider in ordered if _strict_vision_backend_available(provider)]
|
||||
|
||||
|
||||
def resolve_vision_provider_client(
|
||||
provider: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
*,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
async_mode: bool = False,
|
||||
) -> Tuple[Optional[str], Optional[Any], Optional[str]]:
|
||||
"""Resolve the client actually used for vision tasks.
|
||||
|
||||
Explicit provider overrides still use the generic provider router for
|
||||
non-standard backends, so users can intentionally force experimental
|
||||
providers. Auto mode stays conservative and only tries vision backends
|
||||
known to work today.
|
||||
Direct endpoint overrides take precedence over provider selection. Explicit
|
||||
provider overrides still use the generic provider router for non-standard
|
||||
backends, so users can intentionally force experimental providers. Auto mode
|
||||
stays conservative and only tries vision backends known to work today.
|
||||
"""
|
||||
requested = _normalize_vision_provider(provider or _get_auxiliary_provider("vision"))
|
||||
requested, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
|
||||
"vision", provider, model, base_url, api_key
|
||||
)
|
||||
requested = _normalize_vision_provider(requested)
|
||||
|
||||
def _finalize(resolved_provider: str, sync_client: Any, default_model: Optional[str]):
|
||||
if sync_client is None:
|
||||
return resolved_provider, None, None
|
||||
final_model = model or default_model
|
||||
final_model = resolved_model or default_model
|
||||
if async_mode:
|
||||
async_client, async_model = _to_async_client(sync_client, final_model)
|
||||
return resolved_provider, async_client, async_model
|
||||
return resolved_provider, sync_client, final_model
|
||||
|
||||
if resolved_base_url:
|
||||
client, final_model = resolve_provider_client(
|
||||
"custom",
|
||||
model=resolved_model,
|
||||
async_mode=async_mode,
|
||||
explicit_base_url=resolved_base_url,
|
||||
explicit_api_key=resolved_api_key,
|
||||
)
|
||||
if client is None:
|
||||
return "custom", None, None
|
||||
return "custom", client, final_model
|
||||
|
||||
if requested == "auto":
|
||||
for candidate in get_available_vision_backends():
|
||||
sync_client, default_model = _resolve_strict_vision_backend(candidate)
|
||||
|
|
@ -891,7 +1108,7 @@ def resolve_vision_provider_client(
|
|||
sync_client, default_model = _resolve_strict_vision_backend(requested)
|
||||
return _finalize(requested, sync_client, default_model)
|
||||
|
||||
client, final_model = _get_cached_client(requested, model, async_mode)
|
||||
client, final_model = _get_cached_client(requested, resolved_model, async_mode)
|
||||
if client is None:
|
||||
return requested, None, None
|
||||
return requested, client, final_model
|
||||
|
|
@ -948,19 +1165,29 @@ def auxiliary_max_tokens_param(value: int) -> dict:
|
|||
# Every auxiliary LLM consumer should use these instead of manually
|
||||
# constructing clients and calling .chat.completions.create().
|
||||
|
||||
# Client cache: (provider, async_mode) -> (client, default_model)
|
||||
# Client cache: (provider, async_mode, base_url, api_key) -> (client, default_model)
|
||||
_client_cache: Dict[tuple, tuple] = {}
|
||||
|
||||
|
||||
def _get_cached_client(
|
||||
provider: str, model: str = None, async_mode: bool = False,
|
||||
provider: str,
|
||||
model: str = None,
|
||||
async_mode: bool = False,
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Get or create a cached client for the given provider."""
|
||||
cache_key = (provider, async_mode)
|
||||
cache_key = (provider, async_mode, base_url or "", api_key or "")
|
||||
if cache_key in _client_cache:
|
||||
cached_client, cached_default = _client_cache[cache_key]
|
||||
return cached_client, model or cached_default
|
||||
client, default_model = resolve_provider_client(provider, model, async_mode)
|
||||
client, default_model = resolve_provider_client(
|
||||
provider,
|
||||
model,
|
||||
async_mode,
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
)
|
||||
if client is not None:
|
||||
_client_cache[cache_key] = (client, default_model)
|
||||
return client, model or default_model
|
||||
|
|
@ -970,57 +1197,75 @@ def _resolve_task_provider_model(
|
|||
task: str = None,
|
||||
provider: str = None,
|
||||
model: str = None,
|
||||
) -> Tuple[str, Optional[str]]:
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
) -> Tuple[str, Optional[str], Optional[str], Optional[str]]:
|
||||
"""Determine provider + model for a call.
|
||||
|
||||
Priority:
|
||||
1. Explicit provider/model args (always win)
|
||||
2. Env var overrides (AUXILIARY_{TASK}_PROVIDER, etc.)
|
||||
3. Config file (auxiliary.{task}.provider/model or compression.*)
|
||||
1. Explicit provider/model/base_url/api_key args (always win)
|
||||
2. Env var overrides (AUXILIARY_{TASK}_*, CONTEXT_{TASK}_*)
|
||||
3. Config file (auxiliary.{task}.* or compression.*)
|
||||
4. "auto" (full auto-detection chain)
|
||||
|
||||
Returns (provider, model) where model may be None (use provider default).
|
||||
Returns (provider, model, base_url, api_key) where model may be None
|
||||
(use provider default). When base_url is set, provider is forced to
|
||||
"custom" and the task uses that direct endpoint.
|
||||
"""
|
||||
if provider:
|
||||
return provider, model
|
||||
config = {}
|
||||
cfg_provider = None
|
||||
cfg_model = None
|
||||
cfg_base_url = None
|
||||
cfg_api_key = None
|
||||
|
||||
if task:
|
||||
# Check env var overrides first
|
||||
env_provider = _get_auxiliary_provider(task)
|
||||
if env_provider != "auto":
|
||||
# Check for env var model override too
|
||||
env_model = None
|
||||
for prefix in ("AUXILIARY_", "CONTEXT_"):
|
||||
val = os.getenv(f"{prefix}{task.upper()}_MODEL", "").strip()
|
||||
if val:
|
||||
env_model = val
|
||||
break
|
||||
return env_provider, model or env_model
|
||||
|
||||
# Read from config file
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config()
|
||||
except ImportError:
|
||||
return "auto", model
|
||||
config = {}
|
||||
|
||||
# Check auxiliary.{task} section
|
||||
aux = config.get("auxiliary", {})
|
||||
task_config = aux.get(task, {})
|
||||
cfg_provider = task_config.get("provider", "").strip() or None
|
||||
cfg_model = task_config.get("model", "").strip() or None
|
||||
aux = config.get("auxiliary", {}) if isinstance(config, dict) else {}
|
||||
task_config = aux.get(task, {}) if isinstance(aux, dict) else {}
|
||||
if not isinstance(task_config, dict):
|
||||
task_config = {}
|
||||
cfg_provider = str(task_config.get("provider", "")).strip() or None
|
||||
cfg_model = str(task_config.get("model", "")).strip() or None
|
||||
cfg_base_url = str(task_config.get("base_url", "")).strip() or None
|
||||
cfg_api_key = str(task_config.get("api_key", "")).strip() or None
|
||||
|
||||
# Backwards compat: compression section has its own keys
|
||||
if task == "compression" and not cfg_provider:
|
||||
comp = config.get("compression", {})
|
||||
cfg_provider = comp.get("summary_provider", "").strip() or None
|
||||
cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
|
||||
comp = config.get("compression", {}) if isinstance(config, dict) else {}
|
||||
if isinstance(comp, dict):
|
||||
cfg_provider = comp.get("summary_provider", "").strip() or None
|
||||
cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
|
||||
|
||||
env_model = _get_auxiliary_env_override(task, "MODEL") if task else None
|
||||
resolved_model = model or env_model or cfg_model
|
||||
|
||||
if base_url:
|
||||
return "custom", resolved_model, base_url, api_key
|
||||
if provider:
|
||||
return provider, resolved_model, base_url, api_key
|
||||
|
||||
if task:
|
||||
env_base_url = _get_auxiliary_env_override(task, "BASE_URL")
|
||||
env_api_key = _get_auxiliary_env_override(task, "API_KEY")
|
||||
if env_base_url:
|
||||
return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key
|
||||
|
||||
env_provider = _get_auxiliary_provider(task)
|
||||
if env_provider != "auto":
|
||||
return env_provider, resolved_model, None, None
|
||||
|
||||
if cfg_base_url:
|
||||
return "custom", resolved_model, cfg_base_url, cfg_api_key
|
||||
if cfg_provider and cfg_provider != "auto":
|
||||
return cfg_provider, model or cfg_model
|
||||
return "auto", model or cfg_model
|
||||
return cfg_provider, resolved_model, None, None
|
||||
return "auto", resolved_model, None, None
|
||||
|
||||
return "auto", model
|
||||
return "auto", resolved_model, None, None
|
||||
|
||||
|
||||
def _build_call_kwargs(
|
||||
|
|
@ -1032,6 +1277,7 @@ def _build_call_kwargs(
|
|||
tools: Optional[list] = None,
|
||||
timeout: float = 30.0,
|
||||
extra_body: Optional[dict] = None,
|
||||
base_url: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""Build kwargs for .chat.completions.create() with model/provider adjustments."""
|
||||
kwargs: Dict[str, Any] = {
|
||||
|
|
@ -1047,7 +1293,7 @@ def _build_call_kwargs(
|
|||
# Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens.
|
||||
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
|
||||
if provider == "custom":
|
||||
custom_base = _current_custom_base_url()
|
||||
custom_base = base_url or _current_custom_base_url()
|
||||
if "api.openai.com" in custom_base.lower():
|
||||
kwargs["max_completion_tokens"] = max_tokens
|
||||
else:
|
||||
|
|
@ -1073,6 +1319,8 @@ def call_llm(
|
|||
*,
|
||||
provider: str = None,
|
||||
model: str = None,
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
messages: list,
|
||||
temperature: float = None,
|
||||
max_tokens: int = None,
|
||||
|
|
@ -1104,16 +1352,18 @@ def call_llm(
|
|||
Raises:
|
||||
RuntimeError: If no provider is configured.
|
||||
"""
|
||||
resolved_provider, resolved_model = _resolve_task_provider_model(
|
||||
task, provider, model)
|
||||
resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
|
||||
task, provider, model, base_url, api_key)
|
||||
|
||||
if task == "vision":
|
||||
effective_provider, client, final_model = resolve_vision_provider_client(
|
||||
provider=resolved_provider,
|
||||
model=resolved_model,
|
||||
provider=provider,
|
||||
model=model,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
async_mode=False,
|
||||
)
|
||||
if client is None and resolved_provider != "auto":
|
||||
if client is None and resolved_provider != "auto" and not resolved_base_url:
|
||||
logger.warning(
|
||||
"Vision provider %s unavailable, falling back to auto vision backends",
|
||||
resolved_provider,
|
||||
|
|
@ -1130,10 +1380,15 @@ def call_llm(
|
|||
)
|
||||
resolved_provider = effective_provider or resolved_provider
|
||||
else:
|
||||
client, final_model = _get_cached_client(resolved_provider, resolved_model)
|
||||
client, final_model = _get_cached_client(
|
||||
resolved_provider,
|
||||
resolved_model,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
)
|
||||
if client is None:
|
||||
# Fallback: try openrouter
|
||||
if resolved_provider != "openrouter":
|
||||
if resolved_provider != "openrouter" and not resolved_base_url:
|
||||
logger.warning("Provider %s unavailable, falling back to openrouter",
|
||||
resolved_provider)
|
||||
client, final_model = _get_cached_client(
|
||||
|
|
@ -1146,7 +1401,8 @@ def call_llm(
|
|||
kwargs = _build_call_kwargs(
|
||||
resolved_provider, final_model, messages,
|
||||
temperature=temperature, max_tokens=max_tokens,
|
||||
tools=tools, timeout=timeout, extra_body=extra_body)
|
||||
tools=tools, timeout=timeout, extra_body=extra_body,
|
||||
base_url=resolved_base_url)
|
||||
|
||||
# Handle max_tokens vs max_completion_tokens retry
|
||||
try:
|
||||
|
|
@ -1165,6 +1421,8 @@ async def async_call_llm(
|
|||
*,
|
||||
provider: str = None,
|
||||
model: str = None,
|
||||
base_url: str = None,
|
||||
api_key: str = None,
|
||||
messages: list,
|
||||
temperature: float = None,
|
||||
max_tokens: int = None,
|
||||
|
|
@ -1176,16 +1434,18 @@ async def async_call_llm(
|
|||
|
||||
Same as call_llm() but async. See call_llm() for full documentation.
|
||||
"""
|
||||
resolved_provider, resolved_model = _resolve_task_provider_model(
|
||||
task, provider, model)
|
||||
resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model(
|
||||
task, provider, model, base_url, api_key)
|
||||
|
||||
if task == "vision":
|
||||
effective_provider, client, final_model = resolve_vision_provider_client(
|
||||
provider=resolved_provider,
|
||||
model=resolved_model,
|
||||
provider=provider,
|
||||
model=model,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
async_mode=True,
|
||||
)
|
||||
if client is None and resolved_provider != "auto":
|
||||
if client is None and resolved_provider != "auto" and not resolved_base_url:
|
||||
logger.warning(
|
||||
"Vision provider %s unavailable, falling back to auto vision backends",
|
||||
resolved_provider,
|
||||
|
|
@ -1203,9 +1463,14 @@ async def async_call_llm(
|
|||
resolved_provider = effective_provider or resolved_provider
|
||||
else:
|
||||
client, final_model = _get_cached_client(
|
||||
resolved_provider, resolved_model, async_mode=True)
|
||||
resolved_provider,
|
||||
resolved_model,
|
||||
async_mode=True,
|
||||
base_url=resolved_base_url,
|
||||
api_key=resolved_api_key,
|
||||
)
|
||||
if client is None:
|
||||
if resolved_provider != "openrouter":
|
||||
if resolved_provider != "openrouter" and not resolved_base_url:
|
||||
logger.warning("Provider %s unavailable, falling back to openrouter",
|
||||
resolved_provider)
|
||||
client, final_model = _get_cached_client(
|
||||
|
|
@ -1219,7 +1484,8 @@ async def async_call_llm(
|
|||
kwargs = _build_call_kwargs(
|
||||
resolved_provider, final_model, messages,
|
||||
temperature=temperature, max_tokens=max_tokens,
|
||||
tools=tools, timeout=timeout, extra_body=extra_body)
|
||||
tools=tools, timeout=timeout, extra_body=extra_body,
|
||||
base_url=resolved_base_url)
|
||||
|
||||
try:
|
||||
return await client.chat.completions.create(**kwargs)
|
||||
|
|
|
|||
|
|
@ -1,17 +1,42 @@
|
|||
"""Skill slash commands — scan installed skills and build invocation messages.
|
||||
"""Shared slash command helpers for skills and built-in prompt-style modes.
|
||||
|
||||
Shared between CLI (cli.py) and gateway (gateway/run.py) so both surfaces
|
||||
can invoke skills via /skill-name commands.
|
||||
can invoke skills via /skill-name commands and prompt-only built-ins like
|
||||
/plan.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_skill_commands: Dict[str, Dict[str, Any]] = {}
|
||||
_PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
|
||||
|
||||
|
||||
def build_plan_path(
|
||||
user_instruction: str = "",
|
||||
*,
|
||||
now: datetime | None = None,
|
||||
) -> Path:
|
||||
"""Return the default workspace-relative markdown path for a /plan invocation.
|
||||
|
||||
Relative paths are intentional: file tools are task/backend-aware and resolve
|
||||
them against the active working directory for local, docker, ssh, modal,
|
||||
daytona, and similar terminal backends. That keeps the plan with the active
|
||||
workspace instead of the Hermes host's global home directory.
|
||||
"""
|
||||
slug_source = (user_instruction or "").strip().splitlines()[0] if user_instruction else ""
|
||||
slug = _PLAN_SLUG_RE.sub("-", slug_source.lower()).strip("-")
|
||||
if slug:
|
||||
slug = "-".join(part for part in slug.split("-")[:8] if part)[:48].strip("-")
|
||||
slug = slug or "conversation-plan"
|
||||
timestamp = (now or datetime.now()).strftime("%Y-%m-%d_%H%M%S")
|
||||
return Path(".hermes") / "plans" / f"{timestamp}-{slug}.md"
|
||||
|
||||
|
||||
def _load_skill_payload(skill_identifier: str, task_id: str | None = None) -> tuple[dict[str, Any], Path | None, str] | None:
|
||||
|
|
@ -56,6 +81,7 @@ def _build_skill_message(
|
|||
skill_dir: Path | None,
|
||||
activation_note: str,
|
||||
user_instruction: str = "",
|
||||
runtime_note: str = "",
|
||||
) -> str:
|
||||
"""Format a loaded skill into a user/system message payload."""
|
||||
from tools.skills_tool import SKILLS_DIR
|
||||
|
|
@ -115,6 +141,10 @@ def _build_skill_message(
|
|||
parts.append("")
|
||||
parts.append(f"The user has provided the following instruction alongside the skill invocation: {user_instruction}")
|
||||
|
||||
if runtime_note:
|
||||
parts.append("")
|
||||
parts.append(f"[Runtime note: {runtime_note}]")
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
|
|
@ -172,6 +202,7 @@ def build_skill_invocation_message(
|
|||
cmd_key: str,
|
||||
user_instruction: str = "",
|
||||
task_id: str | None = None,
|
||||
runtime_note: str = "",
|
||||
) -> Optional[str]:
|
||||
"""Build the user message content for a skill slash command invocation.
|
||||
|
||||
|
|
@ -201,6 +232,7 @@ def build_skill_invocation_message(
|
|||
skill_dir,
|
||||
activation_note,
|
||||
user_instruction=user_instruction,
|
||||
runtime_note=runtime_note,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue