Merge pull request #1003 from NousResearch/hermes/hermes-cf9f7d54

feat: centralized provider router, call_llm API, unified /model command
This commit is contained in:
Teknium 2026-03-12 00:29:18 -07:00 committed by GitHub
commit 9cb9d1a47a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
33 changed files with 1260 additions and 937 deletions

View file

@ -17,7 +17,10 @@ Resolution order for text tasks (auto mode):
Resolution order for vision/multimodal tasks (auto mode): Resolution order for vision/multimodal tasks (auto mode):
1. OpenRouter 1. OpenRouter
2. Nous Portal 2. Nous Portal
3. None (steps 3-5 are skipped they may not support multimodal) 3. Codex OAuth (gpt-5.3-codex supports vision via Responses API)
4. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
5. None (API-key providers like z.ai/Kimi/MiniMax are skipped
they may not support multimodal)
Per-task provider overrides (e.g. AUXILIARY_VISION_PROVIDER, Per-task provider overrides (e.g. AUXILIARY_VISION_PROVIDER,
CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task: CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task:
@ -440,7 +443,7 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
custom_key = os.getenv("OPENAI_API_KEY") custom_key = os.getenv("OPENAI_API_KEY")
if not custom_base or not custom_key: if not custom_base or not custom_key:
return None, None return None, None
model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini" model = os.getenv("OPENAI_MODEL") or "gpt-4o-mini"
logger.debug("Auxiliary client: custom endpoint (%s)", model) logger.debug("Auxiliary client: custom endpoint (%s)", model)
return OpenAI(api_key=custom_key, base_url=custom_base), model return OpenAI(api_key=custom_key, base_url=custom_base), model
@ -499,6 +502,205 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
return None, None return None, None
# ── Centralized Provider Router ─────────────────────────────────────────────
#
# resolve_provider_client() is the single entry point for creating a properly
# configured client given a (provider, model) pair. It handles auth lookup,
# base URL resolution, provider-specific headers, and API format differences
# (Chat Completions vs Responses API for Codex).
#
# All auxiliary consumer code should go through this or the public helpers
# below — never look up auth env vars ad-hoc.
def _to_async_client(sync_client, model: str):
"""Convert a sync client to its async counterpart, preserving Codex routing."""
from openai import AsyncOpenAI
if isinstance(sync_client, CodexAuxiliaryClient):
return AsyncCodexAuxiliaryClient(sync_client), model
async_kwargs = {
"api_key": sync_client.api_key,
"base_url": str(sync_client.base_url),
}
base_lower = str(sync_client.base_url).lower()
if "openrouter" in base_lower:
async_kwargs["default_headers"] = dict(_OR_HEADERS)
elif "api.kimi.com" in base_lower:
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
return AsyncOpenAI(**async_kwargs), model
def resolve_provider_client(
provider: str,
model: str = None,
async_mode: bool = False,
raw_codex: bool = False,
) -> Tuple[Optional[Any], Optional[str]]:
"""Central router: given a provider name and optional model, return a
configured client with the correct auth, base URL, and API format.
The returned client always exposes ``.chat.completions.create()`` for
Codex/Responses API providers, an adapter handles the translation
transparently.
Args:
provider: Provider identifier. One of:
"openrouter", "nous", "openai-codex" (or "codex"),
"zai", "kimi-coding", "minimax", "minimax-cn",
"custom" (OPENAI_BASE_URL + OPENAI_API_KEY),
"auto" (full auto-detection chain).
model: Model slug override. If None, uses the provider's default
auxiliary model.
async_mode: If True, return an async-compatible client.
raw_codex: If True, return a raw OpenAI client for Codex providers
instead of wrapping in CodexAuxiliaryClient. Use this when
the caller needs direct access to responses.stream() (e.g.,
the main agent loop).
Returns:
(client, resolved_model) or (None, None) if auth is unavailable.
"""
# Normalise aliases
provider = (provider or "auto").strip().lower()
if provider == "codex":
provider = "openai-codex"
if provider == "main":
provider = "custom"
# ── Auto: try all providers in priority order ────────────────────
if provider == "auto":
client, resolved = _resolve_auto()
if client is None:
return None, None
final_model = model or resolved
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── OpenRouter ───────────────────────────────────────────────────
if provider == "openrouter":
client, default = _try_openrouter()
if client is None:
logger.warning("resolve_provider_client: openrouter requested "
"but OPENROUTER_API_KEY not set")
return None, None
final_model = model or default
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── Nous Portal (OAuth) ──────────────────────────────────────────
if provider == "nous":
client, default = _try_nous()
if client is None:
logger.warning("resolve_provider_client: nous requested "
"but Nous Portal not configured (run: hermes login)")
return None, None
final_model = model or default
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
if provider == "openai-codex":
if raw_codex:
# Return the raw OpenAI client for callers that need direct
# access to responses.stream() (e.g., the main agent loop).
codex_token = _read_codex_access_token()
if not codex_token:
logger.warning("resolve_provider_client: openai-codex requested "
"but no Codex OAuth token found (run: hermes model)")
return None, None
final_model = model or _CODEX_AUX_MODEL
raw_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
return (raw_client, final_model)
# Standard path: wrap in CodexAuxiliaryClient adapter
client, default = _try_codex()
if client is None:
logger.warning("resolve_provider_client: openai-codex requested "
"but no Codex OAuth token found (run: hermes model)")
return None, None
final_model = model or default
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
if provider == "custom":
# Try custom first, then codex, then API-key providers
for try_fn in (_try_custom_endpoint, _try_codex,
_resolve_api_key_provider):
client, default = try_fn()
if client is not None:
final_model = model or default
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
logger.warning("resolve_provider_client: custom/main requested "
"but no endpoint credentials found")
return None, None
# ── API-key providers from PROVIDER_REGISTRY ─────────────────────
try:
from hermes_cli.auth import PROVIDER_REGISTRY, _resolve_kimi_base_url
except ImportError:
logger.debug("hermes_cli.auth not available for provider %s", provider)
return None, None
pconfig = PROVIDER_REGISTRY.get(provider)
if pconfig is None:
logger.warning("resolve_provider_client: unknown provider %r", provider)
return None, None
if pconfig.auth_type == "api_key":
# Find the first configured API key
api_key = ""
for env_var in pconfig.api_key_env_vars:
api_key = os.getenv(env_var, "").strip()
if api_key:
break
if not api_key:
logger.warning("resolve_provider_client: provider %s has no API "
"key configured (tried: %s)",
provider, ", ".join(pconfig.api_key_env_vars))
return None, None
# Resolve base URL (env override → provider-specific logic → default)
base_url_override = os.getenv(pconfig.base_url_env_var, "").strip() if pconfig.base_url_env_var else ""
if provider == "kimi-coding":
base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, base_url_override)
elif base_url_override:
base_url = base_url_override
else:
base_url = pconfig.inference_base_url
default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
final_model = model or default_model
# Provider-specific headers
headers = {}
if "api.kimi.com" in base_url.lower():
headers["User-Agent"] = "KimiCLI/1.0"
client = OpenAI(api_key=api_key, base_url=base_url,
**({"default_headers": headers} if headers else {}))
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
# OAuth providers — route through their specific try functions
if provider == "nous":
return resolve_provider_client("nous", model, async_mode)
if provider == "openai-codex":
return resolve_provider_client("openai-codex", model, async_mode)
# Other OAuth providers not directly supported
logger.warning("resolve_provider_client: OAuth provider %s not "
"directly supported, try 'auto'", provider)
return None, None
logger.warning("resolve_provider_client: unhandled auth_type %s for %s",
pconfig.auth_type, provider)
return None, None
# ── Public API ────────────────────────────────────────────────────────────── # ── Public API ──────────────────────────────────────────────────────────────
def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]: def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]:
@ -513,8 +715,8 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
""" """
forced = _get_auxiliary_provider(task) forced = _get_auxiliary_provider(task)
if forced != "auto": if forced != "auto":
return _resolve_forced_provider(forced) return resolve_provider_client(forced)
return _resolve_auto() return resolve_provider_client("auto")
def get_async_text_auxiliary_client(task: str = ""): def get_async_text_auxiliary_client(task: str = ""):
@ -524,24 +726,10 @@ def get_async_text_auxiliary_client(task: str = ""):
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API. (AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
Returns (None, None) when no provider is available. Returns (None, None) when no provider is available.
""" """
from openai import AsyncOpenAI forced = _get_auxiliary_provider(task)
if forced != "auto":
sync_client, model = get_text_auxiliary_client(task) return resolve_provider_client(forced, async_mode=True)
if sync_client is None: return resolve_provider_client("auto", async_mode=True)
return None, None
if isinstance(sync_client, CodexAuxiliaryClient):
return AsyncCodexAuxiliaryClient(sync_client), model
async_kwargs = {
"api_key": sync_client.api_key,
"base_url": str(sync_client.base_url),
}
if "openrouter" in str(sync_client.base_url).lower():
async_kwargs["default_headers"] = dict(_OR_HEADERS)
elif "api.kimi.com" in str(sync_client.base_url).lower():
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
return AsyncOpenAI(**async_kwargs), model
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
@ -559,7 +747,7 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
""" """
forced = _get_auxiliary_provider("vision") forced = _get_auxiliary_provider("vision")
if forced != "auto": if forced != "auto":
return _resolve_forced_provider(forced) return resolve_provider_client(forced)
# Auto: try providers known to support multimodal first, then fall # Auto: try providers known to support multimodal first, then fall
# back to the user's custom endpoint. Many local models (Qwen-VL, # back to the user's custom endpoint. Many local models (Qwen-VL,
# LLaVA, Pixtral, etc.) support vision — skipping them entirely # LLaVA, Pixtral, etc.) support vision — skipping them entirely
@ -573,6 +761,21 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
return None, None return None, None
def get_async_vision_auxiliary_client():
"""Return (async_client, model_slug) for async vision consumers.
Properly handles Codex routing unlike manually constructing
AsyncOpenAI from a sync client, this preserves the Responses API
adapter for Codex providers.
Returns (None, None) when no provider is available.
"""
sync_client, model = get_vision_auxiliary_client()
if sync_client is None:
return None, None
return _to_async_client(sync_client, model)
def get_auxiliary_extra_body() -> dict: def get_auxiliary_extra_body() -> dict:
"""Return extra_body kwargs for auxiliary API calls. """Return extra_body kwargs for auxiliary API calls.
@ -598,3 +801,253 @@ def auxiliary_max_tokens_param(value: int) -> dict:
and "api.openai.com" in custom_base.lower()): and "api.openai.com" in custom_base.lower()):
return {"max_completion_tokens": value} return {"max_completion_tokens": value}
return {"max_tokens": value} return {"max_tokens": value}
# ── Centralized LLM Call API ────────────────────────────────────────────────
#
# call_llm() and async_call_llm() own the full request lifecycle:
# 1. Resolve provider + model from task config (or explicit args)
# 2. Get or create a cached client for that provider
# 3. Format request args for the provider + model (max_tokens handling, etc.)
# 4. Make the API call
# 5. Return the response
#
# Every auxiliary LLM consumer should use these instead of manually
# constructing clients and calling .chat.completions.create().
# Client cache: (provider, async_mode) -> (client, default_model)
_client_cache: Dict[tuple, tuple] = {}
def _get_cached_client(
provider: str, model: str = None, async_mode: bool = False,
) -> Tuple[Optional[Any], Optional[str]]:
"""Get or create a cached client for the given provider."""
cache_key = (provider, async_mode)
if cache_key in _client_cache:
cached_client, cached_default = _client_cache[cache_key]
return cached_client, model or cached_default
client, default_model = resolve_provider_client(provider, model, async_mode)
if client is not None:
_client_cache[cache_key] = (client, default_model)
return client, model or default_model
def _resolve_task_provider_model(
task: str = None,
provider: str = None,
model: str = None,
) -> Tuple[str, Optional[str]]:
"""Determine provider + model for a call.
Priority:
1. Explicit provider/model args (always win)
2. Env var overrides (AUXILIARY_{TASK}_PROVIDER, etc.)
3. Config file (auxiliary.{task}.provider/model or compression.*)
4. "auto" (full auto-detection chain)
Returns (provider, model) where model may be None (use provider default).
"""
if provider:
return provider, model
if task:
# Check env var overrides first
env_provider = _get_auxiliary_provider(task)
if env_provider != "auto":
# Check for env var model override too
env_model = None
for prefix in ("AUXILIARY_", "CONTEXT_"):
val = os.getenv(f"{prefix}{task.upper()}_MODEL", "").strip()
if val:
env_model = val
break
return env_provider, model or env_model
# Read from config file
try:
from hermes_cli.config import load_config
config = load_config()
except ImportError:
return "auto", model
# Check auxiliary.{task} section
aux = config.get("auxiliary", {})
task_config = aux.get(task, {})
cfg_provider = task_config.get("provider", "").strip() or None
cfg_model = task_config.get("model", "").strip() or None
# Backwards compat: compression section has its own keys
if task == "compression" and not cfg_provider:
comp = config.get("compression", {})
cfg_provider = comp.get("summary_provider", "").strip() or None
cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
if cfg_provider and cfg_provider != "auto":
return cfg_provider, model or cfg_model
return "auto", model or cfg_model
return "auto", model
def _build_call_kwargs(
provider: str,
model: str,
messages: list,
temperature: Optional[float] = None,
max_tokens: Optional[int] = None,
tools: Optional[list] = None,
timeout: float = 30.0,
extra_body: Optional[dict] = None,
) -> dict:
"""Build kwargs for .chat.completions.create() with model/provider adjustments."""
kwargs: Dict[str, Any] = {
"model": model,
"messages": messages,
"timeout": timeout,
}
if temperature is not None:
kwargs["temperature"] = temperature
if max_tokens is not None:
# Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens.
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
if provider == "custom":
custom_base = os.getenv("OPENAI_BASE_URL", "")
if "api.openai.com" in custom_base.lower():
kwargs["max_completion_tokens"] = max_tokens
else:
kwargs["max_tokens"] = max_tokens
else:
kwargs["max_tokens"] = max_tokens
if tools:
kwargs["tools"] = tools
# Provider-specific extra_body
merged_extra = dict(extra_body or {})
if provider == "nous" or auxiliary_is_nous:
merged_extra.setdefault("tags", []).extend(["product=hermes-agent"])
if merged_extra:
kwargs["extra_body"] = merged_extra
return kwargs
def call_llm(
task: str = None,
*,
provider: str = None,
model: str = None,
messages: list,
temperature: float = None,
max_tokens: int = None,
tools: list = None,
timeout: float = 30.0,
extra_body: dict = None,
) -> Any:
"""Centralized synchronous LLM call.
Resolves provider + model (from task config, explicit args, or auto-detect),
handles auth, request formatting, and model-specific arg adjustments.
Args:
task: Auxiliary task name ("compression", "vision", "web_extract",
"session_search", "skills_hub", "mcp", "flush_memories").
Reads provider:model from config/env. Ignored if provider is set.
provider: Explicit provider override.
model: Explicit model override.
messages: Chat messages list.
temperature: Sampling temperature (None = provider default).
max_tokens: Max output tokens (handles max_tokens vs max_completion_tokens).
tools: Tool definitions (for function calling).
timeout: Request timeout in seconds.
extra_body: Additional request body fields.
Returns:
Response object with .choices[0].message.content
Raises:
RuntimeError: If no provider is configured.
"""
resolved_provider, resolved_model = _resolve_task_provider_model(
task, provider, model)
client, final_model = _get_cached_client(resolved_provider, resolved_model)
if client is None:
# Fallback: try openrouter
if resolved_provider != "openrouter":
logger.warning("Provider %s unavailable, falling back to openrouter",
resolved_provider)
client, final_model = _get_cached_client(
"openrouter", resolved_model or _OPENROUTER_MODEL)
if client is None:
raise RuntimeError(
f"No LLM provider configured for task={task} provider={resolved_provider}. "
f"Run: hermes setup")
kwargs = _build_call_kwargs(
resolved_provider, final_model, messages,
temperature=temperature, max_tokens=max_tokens,
tools=tools, timeout=timeout, extra_body=extra_body)
# Handle max_tokens vs max_completion_tokens retry
try:
return client.chat.completions.create(**kwargs)
except Exception as first_err:
err_str = str(first_err)
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
kwargs.pop("max_tokens", None)
kwargs["max_completion_tokens"] = max_tokens
return client.chat.completions.create(**kwargs)
raise
async def async_call_llm(
task: str = None,
*,
provider: str = None,
model: str = None,
messages: list,
temperature: float = None,
max_tokens: int = None,
tools: list = None,
timeout: float = 30.0,
extra_body: dict = None,
) -> Any:
"""Centralized asynchronous LLM call.
Same as call_llm() but async. See call_llm() for full documentation.
"""
resolved_provider, resolved_model = _resolve_task_provider_model(
task, provider, model)
client, final_model = _get_cached_client(
resolved_provider, resolved_model, async_mode=True)
if client is None:
if resolved_provider != "openrouter":
logger.warning("Provider %s unavailable, falling back to openrouter",
resolved_provider)
client, final_model = _get_cached_client(
"openrouter", resolved_model or _OPENROUTER_MODEL,
async_mode=True)
if client is None:
raise RuntimeError(
f"No LLM provider configured for task={task} provider={resolved_provider}. "
f"Run: hermes setup")
kwargs = _build_call_kwargs(
resolved_provider, final_model, messages,
temperature=temperature, max_tokens=max_tokens,
tools=tools, timeout=timeout, extra_body=extra_body)
try:
return await client.chat.completions.create(**kwargs)
except Exception as first_err:
err_str = str(first_err)
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
kwargs.pop("max_tokens", None)
kwargs["max_completion_tokens"] = max_tokens
return await client.chat.completions.create(**kwargs)
raise

View file

@ -9,7 +9,7 @@ import logging
import os import os
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from agent.auxiliary_client import get_text_auxiliary_client from agent.auxiliary_client import call_llm
from agent.model_metadata import ( from agent.model_metadata import (
get_model_context_length, get_model_context_length,
estimate_messages_tokens_rough, estimate_messages_tokens_rough,
@ -53,8 +53,7 @@ class ContextCompressor:
self.last_completion_tokens = 0 self.last_completion_tokens = 0
self.last_total_tokens = 0 self.last_total_tokens = 0
self.client, default_model = get_text_auxiliary_client("compression") self.summary_model = summary_model_override or ""
self.summary_model = summary_model_override or default_model
def update_from_response(self, usage: Dict[str, Any]): def update_from_response(self, usage: Dict[str, Any]):
"""Update tracked token usage from API response.""" """Update tracked token usage from API response."""
@ -120,84 +119,30 @@ TURNS TO SUMMARIZE:
Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
# 1. Try the auxiliary model (cheap/fast) # Use the centralized LLM router — handles provider resolution,
if self.client: # auth, and fallback internally.
try:
return self._call_summary_model(self.client, self.summary_model, prompt)
except Exception as e:
logging.warning(f"Failed to generate context summary with auxiliary model: {e}")
# 2. Fallback: try the user's main model endpoint
fallback_client, fallback_model = self._get_fallback_client()
if fallback_client is not None:
try:
logger.info("Retrying context summary with main model (%s)", fallback_model)
summary = self._call_summary_model(fallback_client, fallback_model, prompt)
self.client = fallback_client
self.summary_model = fallback_model
return summary
except Exception as fallback_err:
logging.warning(f"Main model summary also failed: {fallback_err}")
# 3. All models failed — return None so the caller drops turns without a summary
logging.warning("Context compression: no model available for summary. Middle turns will be dropped without summary.")
return None
def _call_summary_model(self, client, model: str, prompt: str) -> str:
"""Make the actual LLM call to generate a summary. Raises on failure."""
kwargs = {
"model": model,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.3,
"timeout": 30.0,
}
# Most providers (OpenRouter, local models) use max_tokens.
# Direct OpenAI with newer models (gpt-4o, o-series, gpt-5+)
# requires max_completion_tokens instead.
try: try:
kwargs["max_tokens"] = self.summary_target_tokens * 2 call_kwargs = {
response = client.chat.completions.create(**kwargs) "task": "compression",
except Exception as first_err: "messages": [{"role": "user", "content": prompt}],
if "max_tokens" in str(first_err) or "unsupported_parameter" in str(first_err): "temperature": 0.3,
kwargs.pop("max_tokens", None) "max_tokens": self.summary_target_tokens * 2,
kwargs["max_completion_tokens"] = self.summary_target_tokens * 2 "timeout": 30.0,
response = client.chat.completions.create(**kwargs) }
else: if self.summary_model:
raise call_kwargs["model"] = self.summary_model
response = call_llm(**call_kwargs)
summary = response.choices[0].message.content.strip() summary = response.choices[0].message.content.strip()
if not summary.startswith("[CONTEXT SUMMARY]:"): if not summary.startswith("[CONTEXT SUMMARY]:"):
summary = "[CONTEXT SUMMARY]: " + summary summary = "[CONTEXT SUMMARY]: " + summary
return summary return summary
except RuntimeError:
def _get_fallback_client(self): logging.warning("Context compression: no provider available for "
"""Try to build a fallback client from the main model's endpoint config. "summary. Middle turns will be dropped without summary.")
return None
When the primary auxiliary client fails (e.g. stale OpenRouter key), this except Exception as e:
creates a client using the user's active custom endpoint (OPENAI_BASE_URL) logging.warning("Failed to generate context summary: %s", e)
so compression can still produce a real summary instead of a static string. return None
Returns (client, model) or (None, None).
"""
custom_base = os.getenv("OPENAI_BASE_URL")
custom_key = os.getenv("OPENAI_API_KEY")
if not custom_base or not custom_key:
return None, None
# Don't fallback to the same provider that just failed
from hermes_constants import OPENROUTER_BASE_URL
if custom_base.rstrip("/") == OPENROUTER_BASE_URL.rstrip("/"):
return None, None
model = os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or self.model
try:
from openai import OpenAI as _OpenAI
client = _OpenAI(api_key=custom_key, base_url=custom_base)
logger.debug("Built fallback auxiliary client: %s via %s", model, custom_base)
return client, model
except Exception as exc:
logger.debug("Could not build fallback auxiliary client: %s", exc)
return None, None
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Tool-call / tool-result pair integrity helpers # Tool-call / tool-result pair integrity helpers

143
cli.py
View file

@ -1129,12 +1129,17 @@ class HermesCLI:
self.verbose = verbose if verbose is not None else (self.tool_progress_mode == "verbose") self.verbose = verbose if verbose is not None else (self.tool_progress_mode == "verbose")
# Configuration - priority: CLI args > env vars > config file # Configuration - priority: CLI args > env vars > config file
# Model can come from: CLI arg, LLM_MODEL env, OPENAI_MODEL env (custom endpoint), or config # Model comes from: CLI arg or config.yaml (single source of truth).
self.model = model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or CLI_CONFIG["model"]["default"] # LLM_MODEL/OPENAI_MODEL env vars are NOT checked — config.yaml is
# authoritative. This avoids conflicts in multi-agent setups where
# env vars would stomp each other.
_model_config = CLI_CONFIG.get("model", {})
_config_model = _model_config.get("default", "") if isinstance(_model_config, dict) else (_model_config or "")
self.model = model or _config_model or "anthropic/claude-opus-4.6"
# Track whether model was explicitly chosen by the user or fell back # Track whether model was explicitly chosen by the user or fell back
# to the global default. Provider-specific normalisation may override # to the global default. Provider-specific normalisation may override
# the default silently but should warn when overriding an explicit choice. # the default silently but should warn when overriding an explicit choice.
self._model_is_default = not (model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL")) self._model_is_default = not model
self._explicit_api_key = api_key self._explicit_api_key = api_key
self._explicit_base_url = base_url self._explicit_base_url = base_url
@ -2260,6 +2265,72 @@ class HermesCLI:
remaining = len(self.conversation_history) remaining = len(self.conversation_history)
print(f" {remaining} message(s) remaining in history.") print(f" {remaining} message(s) remaining in history.")
def _show_model_and_providers(self):
"""Unified /model and /provider display.
Shows current model + provider, then lists all authenticated
providers with their available models so users can switch easily.
"""
from hermes_cli.models import (
curated_models_for_provider, list_available_providers,
normalize_provider, _PROVIDER_LABELS,
)
from hermes_cli.auth import resolve_provider as _resolve_provider
# Resolve current provider
raw_provider = normalize_provider(self.provider)
if raw_provider == "auto":
try:
current = _resolve_provider(
self.requested_provider,
explicit_api_key=self._explicit_api_key,
explicit_base_url=self._explicit_base_url,
)
except Exception:
current = "openrouter"
else:
current = raw_provider
current_label = _PROVIDER_LABELS.get(current, current)
print(f"\n Current: {self.model} via {current_label}")
print()
# Show all authenticated providers with their models
providers = list_available_providers()
authed = [p for p in providers if p["authenticated"]]
unauthed = [p for p in providers if not p["authenticated"]]
if authed:
print(" Authenticated providers & models:")
for p in authed:
is_active = p["id"] == current
marker = " ← active" if is_active else ""
print(f" [{p['id']}]{marker}")
curated = curated_models_for_provider(p["id"])
if curated:
for mid, desc in curated:
current_marker = " ← current" if (is_active and mid == self.model) else ""
print(f" {mid}{current_marker}")
else:
print(f" (use /model {p['id']}:<model-name>)")
print()
if unauthed:
names = ", ".join(p["label"] for p in unauthed)
print(f" Not configured: {names}")
print(f" Run: hermes setup")
print()
print(" Switch model: /model <model-name>")
print(" Switch provider: /model <provider>:<model-name>")
if authed and len(authed) > 1:
# Show a concrete example with a non-active provider
other = next((p for p in authed if p["id"] != current), authed[0])
other_models = curated_models_for_provider(other["id"])
if other_models:
example_model = other_models[0][0]
print(f" Example: /model {other['id']}:{example_model}")
def _handle_prompt_command(self, cmd: str): def _handle_prompt_command(self, cmd: str):
"""Handle the /prompt command to view or set system prompt.""" """Handle the /prompt command to view or set system prompt."""
parts = cmd.split(maxsplit=1) parts = cmd.split(maxsplit=1)
@ -2724,7 +2795,11 @@ class HermesCLI:
base_url_for_probe = runtime.get("base_url", "") base_url_for_probe = runtime.get("base_url", "")
except Exception as e: except Exception as e:
provider_label = _PROVIDER_LABELS.get(target_provider, target_provider) provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
print(f"(>_<) Could not resolve credentials for provider '{provider_label}': {e}") if target_provider == "custom":
print(f"(>_<) Custom endpoint not configured. Set OPENAI_BASE_URL and OPENAI_API_KEY,")
print(f" or run: hermes setup → Custom OpenAI-compatible endpoint")
else:
print(f"(>_<) Could not resolve credentials for provider '{provider_label}': {e}")
print(f"(^_^) Current model unchanged: {self.model}") print(f"(^_^) Current model unchanged: {self.model}")
return True return True
@ -2771,65 +2846,9 @@ class HermesCLI:
print(f" Reason: {message}") print(f" Reason: {message}")
print(" Note: Model will revert on restart. Use a verified model to save to config.") print(" Note: Model will revert on restart. Use a verified model to save to config.")
else: else:
from hermes_cli.models import curated_models_for_provider, normalize_provider, _PROVIDER_LABELS self._show_model_and_providers()
from hermes_cli.auth import resolve_provider as _resolve_provider
# Resolve "auto" to the actual provider using credential detection
raw_provider = normalize_provider(self.provider)
if raw_provider == "auto":
try:
display_provider = _resolve_provider(
self.requested_provider,
explicit_api_key=self._explicit_api_key,
explicit_base_url=self._explicit_base_url,
)
except Exception:
display_provider = "openrouter"
else:
display_provider = raw_provider
provider_label = _PROVIDER_LABELS.get(display_provider, display_provider)
print(f"\n Current model: {self.model}")
print(f" Current provider: {provider_label}")
print()
curated = curated_models_for_provider(display_provider)
if curated:
print(f" Available models ({provider_label}):")
for mid, desc in curated:
marker = "" if mid == self.model else ""
label = f" {desc}" if desc else ""
print(f" {mid}{label}{marker}")
print()
print(" Usage: /model <model-name>")
print(" /model provider:model-name (to switch provider)")
print(" Example: /model openrouter:anthropic/claude-sonnet-4.5")
print(" See /provider for available providers")
elif cmd_lower == "/provider": elif cmd_lower == "/provider":
from hermes_cli.models import list_available_providers, normalize_provider, _PROVIDER_LABELS self._show_model_and_providers()
from hermes_cli.auth import resolve_provider as _resolve_provider
# Resolve current provider
raw_provider = normalize_provider(self.provider)
if raw_provider == "auto":
try:
current = _resolve_provider(
self.requested_provider,
explicit_api_key=self._explicit_api_key,
explicit_base_url=self._explicit_base_url,
)
except Exception:
current = "openrouter"
else:
current = raw_provider
current_label = _PROVIDER_LABELS.get(current, current)
print(f"\n Current provider: {current_label} ({current})\n")
providers = list_available_providers()
print(" Available providers:")
for p in providers:
marker = " ← active" if p["id"] == current else ""
auth = "" if p["authenticated"] else ""
aliases = f" (also: {', '.join(p['aliases'])})" if p["aliases"] else ""
print(f" [{auth}] {p['id']:<14} {p['label']}{aliases}{marker}")
print()
print(" Switch: /model provider:model-name")
print(" Setup: hermes setup")
elif cmd_lower.startswith("/prompt"): elif cmd_lower.startswith("/prompt"):
# Use original case so prompt text isn't lowercased # Use original case so prompt text isn't lowercased
self._handle_prompt_command(cmd_original) self._handle_prompt_command(cmd_original)

View file

@ -180,7 +180,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
except UnicodeDecodeError: except UnicodeDecodeError:
load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1") load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1")
model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6" model = os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
# Load config.yaml for model, reasoning, prefill, toolsets, provider routing # Load config.yaml for model, reasoning, prefill, toolsets, provider routing
_cfg = {} _cfg = {}

View file

@ -1575,7 +1575,7 @@ class GatewayRunner:
config_path = _hermes_home / 'config.yaml' config_path = _hermes_home / 'config.yaml'
# Resolve current model and provider from config # Resolve current model and provider from config
current = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6" current = os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
current_provider = "openrouter" current_provider = "openrouter"
try: try:
if config_path.exists(): if config_path.exists():

View file

@ -108,14 +108,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
auth_type="oauth_external", auth_type="oauth_external",
inference_base_url=DEFAULT_CODEX_BASE_URL, inference_base_url=DEFAULT_CODEX_BASE_URL,
), ),
"nous-api": ProviderConfig(
id="nous-api",
name="Nous Portal (API Key)",
auth_type="api_key",
inference_base_url="https://inference-api.nousresearch.com/v1",
api_key_env_vars=("NOUS_API_KEY",),
base_url_env_var="NOUS_BASE_URL",
),
"zai": ProviderConfig( "zai": ProviderConfig(
id="zai", id="zai",
name="Z.AI / GLM", name="Z.AI / GLM",
@ -521,7 +513,6 @@ def resolve_provider(
# Normalize provider aliases # Normalize provider aliases
_PROVIDER_ALIASES = { _PROVIDER_ALIASES = {
"nous_api": "nous-api", "nousapi": "nous-api", "nous-portal-api": "nous-api",
"glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai", "glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
"kimi": "kimi-coding", "moonshot": "kimi-coding", "kimi": "kimi-coding", "moonshot": "kimi-coding",
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn", "minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
@ -1680,8 +1671,12 @@ def _prompt_model_selection(model_ids: List[str], current_model: str = "") -> Op
def _save_model_choice(model_id: str) -> None: def _save_model_choice(model_id: str) -> None:
"""Save the selected model to config.yaml and .env.""" """Save the selected model to config.yaml (single source of truth).
from hermes_cli.config import save_config, load_config, save_env_value
The model is stored in config.yaml only NOT in .env. This avoids
conflicts in multi-agent setups where env vars would stomp each other.
"""
from hermes_cli.config import save_config, load_config
config = load_config() config = load_config()
# Always use dict format so provider/base_url can be stored alongside # Always use dict format so provider/base_url can be stored alongside
@ -1690,7 +1685,6 @@ def _save_model_choice(model_id: str) -> None:
else: else:
config["model"] = {"default": model_id} config["model"] = {"default": model_id}
save_config(config) save_config(config)
save_env_value("LLM_MODEL", model_id)
def login_command(args) -> None: def login_command(args) -> None:

View file

@ -126,17 +126,41 @@ DEFAULT_CONFIG = {
"summary_provider": "auto", "summary_provider": "auto",
}, },
# Auxiliary model overrides (advanced). By default Hermes auto-selects # Auxiliary model config — provider:model for each side task.
# the provider and model for each side task. Set these to override. # Format: provider is the provider name, model is the model slug.
# "auto" for provider = auto-detect best available provider.
# Empty model = use provider's default auxiliary model.
# All tasks fall back to openrouter:google/gemini-3-flash-preview if
# the configured provider is unavailable.
"auxiliary": { "auxiliary": {
"vision": { "vision": {
"provider": "auto", # auto | openrouter | nous | main "provider": "auto", # auto | openrouter | nous | codex | custom
"model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o" "model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o"
}, },
"web_extract": { "web_extract": {
"provider": "auto", "provider": "auto",
"model": "", "model": "",
}, },
"compression": {
"provider": "auto",
"model": "",
},
"session_search": {
"provider": "auto",
"model": "",
},
"skills_hub": {
"provider": "auto",
"model": "",
},
"mcp": {
"provider": "auto",
"model": "",
},
"flush_memories": {
"provider": "auto",
"model": "",
},
}, },
"display": { "display": {
@ -224,7 +248,7 @@ DEFAULT_CONFIG = {
"personalities": {}, "personalities": {},
# Config schema version - bump this when adding new required fields # Config schema version - bump this when adding new required fields
"_config_version": 6, "_config_version": 7,
} }
# ============================================================================= # =============================================================================
@ -249,14 +273,6 @@ REQUIRED_ENV_VARS = {}
# Optional environment variables that enhance functionality # Optional environment variables that enhance functionality
OPTIONAL_ENV_VARS = { OPTIONAL_ENV_VARS = {
# ── Provider (handled in provider selection, not shown in checklists) ── # ── Provider (handled in provider selection, not shown in checklists) ──
"NOUS_API_KEY": {
"description": "Nous Portal API key (direct API key access to Nous inference)",
"prompt": "Nous Portal API key",
"url": "https://portal.nousresearch.com",
"password": True,
"category": "provider",
"advanced": True,
},
"NOUS_BASE_URL": { "NOUS_BASE_URL": {
"description": "Nous Portal base URL override", "description": "Nous Portal base URL override",
"prompt": "Nous Portal base URL (leave empty for default)", "prompt": "Nous Portal base URL (leave empty for default)",

View file

@ -31,6 +31,19 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
] ]
_PROVIDER_MODELS: dict[str, list[str]] = { _PROVIDER_MODELS: dict[str, list[str]] = {
"nous": [
"claude-opus-4-6",
"claude-sonnet-4-6",
"gpt-5.4",
"gemini-3-flash",
"gemini-3.0-pro-preview",
"deepseek-v3.2",
],
"openai-codex": [
"gpt-5.2-codex",
"gpt-5.1-codex-mini",
"gpt-5.1-codex-max",
],
"zai": [ "zai": [
"glm-5", "glm-5",
"glm-4.7", "glm-4.7",
@ -263,6 +276,15 @@ def validate_requested_model(
"message": "Model names cannot contain spaces.", "message": "Model names cannot contain spaces.",
} }
# Custom endpoints can serve any model — skip validation
if normalized == "custom":
return {
"accepted": True,
"persist": True,
"recognized": False,
"message": None,
}
# Probe the live API to check if the model actually exists # Probe the live API to check if the model actually exists
api_models = fetch_api_models(api_key, base_url) api_models = fetch_api_models(api_key, base_url)

View file

@ -618,7 +618,6 @@ def setup_model_provider(config: dict):
keep_label = None # No provider configured — don't show "Keep current" keep_label = None # No provider configured — don't show "Keep current"
provider_choices = [ provider_choices = [
"Nous Portal API key (direct API key access)",
"Login with Nous Portal (Nous Research subscription — OAuth)", "Login with Nous Portal (Nous Research subscription — OAuth)",
"Login with OpenAI Codex", "Login with OpenAI Codex",
"OpenRouter API key (100+ models, pay-per-use)", "OpenRouter API key (100+ models, pay-per-use)",
@ -632,7 +631,7 @@ def setup_model_provider(config: dict):
provider_choices.append(keep_label) provider_choices.append(keep_label)
# Default to "Keep current" if a provider exists, otherwise OpenRouter (most common) # Default to "Keep current" if a provider exists, otherwise OpenRouter (most common)
default_provider = len(provider_choices) - 1 if has_any_provider else 3 default_provider = len(provider_choices) - 1 if has_any_provider else 2
if not has_any_provider: if not has_any_provider:
print_warning("An inference provider is required for Hermes to work.") print_warning("An inference provider is required for Hermes to work.")
@ -648,42 +647,7 @@ def setup_model_provider(config: dict):
) )
nous_models = [] # populated if Nous login succeeds nous_models = [] # populated if Nous login succeeds
if provider_idx == 0: # Nous Portal API Key (direct) if provider_idx == 0: # Nous Portal (OAuth)
selected_provider = "nous-api"
print()
print_header("Nous Portal API Key")
print_info("Use a Nous Portal API key for direct access to Nous inference.")
print_info("Get your API key at: https://portal.nousresearch.com")
print()
existing_key = get_env_value("NOUS_API_KEY")
if existing_key:
print_info(f"Current: {existing_key[:8]}... (configured)")
if prompt_yes_no("Update Nous API key?", False):
api_key = prompt(" Nous API key", password=True)
if api_key:
save_env_value("NOUS_API_KEY", api_key)
print_success("Nous API key updated")
else:
api_key = prompt(" Nous API key", password=True)
if api_key:
save_env_value("NOUS_API_KEY", api_key)
print_success("Nous API key saved")
else:
print_warning("Skipped - agent won't work without an API key")
# Clear custom endpoint vars if switching
if existing_custom:
save_env_value("OPENAI_BASE_URL", "")
save_env_value("OPENAI_API_KEY", "")
_update_config_for_provider(
"nous-api", "https://inference-api.nousresearch.com/v1"
)
_set_model_provider(
config, "nous-api", "https://inference-api.nousresearch.com/v1"
)
elif provider_idx == 1: # Nous Portal
selected_provider = "nous" selected_provider = "nous"
print() print()
print_header("Nous Portal Login") print_header("Nous Portal Login")
@ -731,7 +695,7 @@ def setup_model_provider(config: dict):
print_info("You can try again later with: hermes model") print_info("You can try again later with: hermes model")
selected_provider = None selected_provider = None
elif provider_idx == 2: # OpenAI Codex elif provider_idx == 1: # OpenAI Codex
selected_provider = "openai-codex" selected_provider = "openai-codex"
print() print()
print_header("OpenAI Codex Login") print_header("OpenAI Codex Login")
@ -757,7 +721,7 @@ def setup_model_provider(config: dict):
print_info("You can try again later with: hermes model") print_info("You can try again later with: hermes model")
selected_provider = None selected_provider = None
elif provider_idx == 3: # OpenRouter elif provider_idx == 2: # OpenRouter
selected_provider = "openrouter" selected_provider = "openrouter"
print() print()
print_header("OpenRouter API Key") print_header("OpenRouter API Key")
@ -812,7 +776,7 @@ def setup_model_provider(config: dict):
except Exception as e: except Exception as e:
logger.debug("Could not save provider to config.yaml: %s", e) logger.debug("Could not save provider to config.yaml: %s", e)
elif provider_idx == 4: # Custom endpoint elif provider_idx == 3: # Custom endpoint
selected_provider = "custom" selected_provider = "custom"
print() print()
print_header("Custom OpenAI-Compatible Endpoint") print_header("Custom OpenAI-Compatible Endpoint")
@ -844,7 +808,6 @@ def setup_model_provider(config: dict):
save_env_value("OPENAI_API_KEY", api_key) save_env_value("OPENAI_API_KEY", api_key)
if model_name: if model_name:
_set_default_model(config, model_name) _set_default_model(config, model_name)
save_env_value("LLM_MODEL", model_name)
try: try:
from hermes_cli.auth import deactivate_provider from hermes_cli.auth import deactivate_provider
@ -882,7 +845,7 @@ def setup_model_provider(config: dict):
print_success("Custom endpoint configured") print_success("Custom endpoint configured")
elif provider_idx == 5: # Z.AI / GLM elif provider_idx == 4: # Z.AI / GLM
selected_provider = "zai" selected_provider = "zai"
print() print()
print_header("Z.AI / GLM API Key") print_header("Z.AI / GLM API Key")
@ -942,7 +905,7 @@ def setup_model_provider(config: dict):
_update_config_for_provider("zai", zai_base_url) _update_config_for_provider("zai", zai_base_url)
_set_model_provider(config, "zai", zai_base_url) _set_model_provider(config, "zai", zai_base_url)
elif provider_idx == 6: # Kimi / Moonshot elif provider_idx == 5: # Kimi / Moonshot
selected_provider = "kimi-coding" selected_provider = "kimi-coding"
print() print()
print_header("Kimi / Moonshot API Key") print_header("Kimi / Moonshot API Key")
@ -975,7 +938,7 @@ def setup_model_provider(config: dict):
_update_config_for_provider("kimi-coding", pconfig.inference_base_url) _update_config_for_provider("kimi-coding", pconfig.inference_base_url)
_set_model_provider(config, "kimi-coding", pconfig.inference_base_url) _set_model_provider(config, "kimi-coding", pconfig.inference_base_url)
elif provider_idx == 7: # MiniMax elif provider_idx == 6: # MiniMax
selected_provider = "minimax" selected_provider = "minimax"
print() print()
print_header("MiniMax API Key") print_header("MiniMax API Key")
@ -1008,7 +971,7 @@ def setup_model_provider(config: dict):
_update_config_for_provider("minimax", pconfig.inference_base_url) _update_config_for_provider("minimax", pconfig.inference_base_url)
_set_model_provider(config, "minimax", pconfig.inference_base_url) _set_model_provider(config, "minimax", pconfig.inference_base_url)
elif provider_idx == 8: # MiniMax China elif provider_idx == 7: # MiniMax China
selected_provider = "minimax-cn" selected_provider = "minimax-cn"
print() print()
print_header("MiniMax China API Key") print_header("MiniMax China API Key")
@ -1041,14 +1004,13 @@ def setup_model_provider(config: dict):
_update_config_for_provider("minimax-cn", pconfig.inference_base_url) _update_config_for_provider("minimax-cn", pconfig.inference_base_url)
_set_model_provider(config, "minimax-cn", pconfig.inference_base_url) _set_model_provider(config, "minimax-cn", pconfig.inference_base_url)
# else: provider_idx == 9 (Keep current) — only shown when a provider already exists # else: provider_idx == 8 (Keep current) — only shown when a provider already exists
# ── OpenRouter API Key for tools (if not already set) ── # ── OpenRouter API Key for tools (if not already set) ──
# Tools (vision, web, MoA) use OpenRouter independently of the main provider. # Tools (vision, web, MoA) use OpenRouter independently of the main provider.
# Prompt for OpenRouter key if not set and a non-OpenRouter provider was chosen. # Prompt for OpenRouter key if not set and a non-OpenRouter provider was chosen.
if selected_provider in ( if selected_provider in (
"nous", "nous",
"nous-api",
"openai-codex", "openai-codex",
"custom", "custom",
"zai", "zai",
@ -1121,15 +1083,6 @@ def setup_model_provider(config: dict):
custom = prompt(f" Model name (Enter to keep '{current_model}')") custom = prompt(f" Model name (Enter to keep '{current_model}')")
if custom: if custom:
_set_default_model(config, custom) _set_default_model(config, custom)
save_env_value("LLM_MODEL", custom)
elif selected_provider == "nous-api":
# Nous API key provider — prompt for model manually
print_info("Enter a model name available on Nous inference API.")
print_info("Examples: anthropic/claude-opus-4.6, deepseek/deepseek-r1")
custom = prompt(f" Model name (Enter to keep '{current_model}')")
if custom:
_set_default_model(config, custom)
save_env_value("LLM_MODEL", custom)
elif selected_provider == "openai-codex": elif selected_provider == "openai-codex":
from hermes_cli.codex_models import get_codex_model_ids from hermes_cli.codex_models import get_codex_model_ids
@ -1146,12 +1099,10 @@ def setup_model_provider(config: dict):
) )
if model_idx < len(codex_models): if model_idx < len(codex_models):
_set_default_model(config, codex_models[model_idx]) _set_default_model(config, codex_models[model_idx])
save_env_value("LLM_MODEL", codex_models[model_idx])
elif model_idx == len(codex_models): elif model_idx == len(codex_models):
custom = prompt("Enter model name") custom = prompt("Enter model name")
if custom: if custom:
_set_default_model(config, custom) _set_default_model(config, custom)
save_env_value("LLM_MODEL", custom)
_update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL) _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
_set_model_provider(config, "openai-codex", DEFAULT_CODEX_BASE_URL) _set_model_provider(config, "openai-codex", DEFAULT_CODEX_BASE_URL)
elif selected_provider == "zai": elif selected_provider == "zai":
@ -1172,12 +1123,10 @@ def setup_model_provider(config: dict):
if model_idx < len(zai_models): if model_idx < len(zai_models):
_set_default_model(config, zai_models[model_idx]) _set_default_model(config, zai_models[model_idx])
save_env_value("LLM_MODEL", zai_models[model_idx])
elif model_idx == len(zai_models): elif model_idx == len(zai_models):
custom = prompt("Enter model name") custom = prompt("Enter model name")
if custom: if custom:
_set_default_model(config, custom) _set_default_model(config, custom)
save_env_value("LLM_MODEL", custom)
# else: keep current # else: keep current
elif selected_provider == "kimi-coding": elif selected_provider == "kimi-coding":
kimi_models = ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"] kimi_models = ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"]
@ -1190,12 +1139,10 @@ def setup_model_provider(config: dict):
if model_idx < len(kimi_models): if model_idx < len(kimi_models):
_set_default_model(config, kimi_models[model_idx]) _set_default_model(config, kimi_models[model_idx])
save_env_value("LLM_MODEL", kimi_models[model_idx])
elif model_idx == len(kimi_models): elif model_idx == len(kimi_models):
custom = prompt("Enter model name") custom = prompt("Enter model name")
if custom: if custom:
_set_default_model(config, custom) _set_default_model(config, custom)
save_env_value("LLM_MODEL", custom)
# else: keep current # else: keep current
elif selected_provider in ("minimax", "minimax-cn"): elif selected_provider in ("minimax", "minimax-cn"):
minimax_models = ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"] minimax_models = ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]
@ -1208,12 +1155,10 @@ def setup_model_provider(config: dict):
if model_idx < len(minimax_models): if model_idx < len(minimax_models):
_set_default_model(config, minimax_models[model_idx]) _set_default_model(config, minimax_models[model_idx])
save_env_value("LLM_MODEL", minimax_models[model_idx])
elif model_idx == len(minimax_models): elif model_idx == len(minimax_models):
custom = prompt("Enter model name") custom = prompt("Enter model name")
if custom: if custom:
_set_default_model(config, custom) _set_default_model(config, custom)
save_env_value("LLM_MODEL", custom)
# else: keep current # else: keep current
else: else:
# Static list for OpenRouter / fallback (from canonical list) # Static list for OpenRouter / fallback (from canonical list)
@ -1230,12 +1175,10 @@ def setup_model_provider(config: dict):
if model_idx < len(ids): if model_idx < len(ids):
_set_default_model(config, ids[model_idx]) _set_default_model(config, ids[model_idx])
save_env_value("LLM_MODEL", ids[model_idx])
elif model_idx == len(ids): # Custom elif model_idx == len(ids): # Custom
custom = prompt("Enter model name (e.g., anthropic/claude-opus-4.6)") custom = prompt("Enter model name (e.g., anthropic/claude-opus-4.6)")
if custom: if custom:
_set_default_model(config, custom) _set_default_model(config, custom)
save_env_value("LLM_MODEL", custom)
# else: Keep current # else: Keep current
_final_model = config.get("model", "") _final_model = config.get("model", "")

View file

@ -189,29 +189,30 @@ class MiniSWERunner:
) )
self.logger = logging.getLogger(__name__) self.logger = logging.getLogger(__name__)
# Initialize OpenAI client - defaults to OpenRouter # Initialize LLM client via centralized provider router.
from openai import OpenAI # If explicit api_key/base_url are provided (e.g. from CLI args),
# construct directly. Otherwise use the router for OpenRouter.
client_kwargs = {} if api_key or base_url:
from openai import OpenAI
# Default to OpenRouter if no base_url provided client_kwargs = {
if base_url: "base_url": base_url or "https://openrouter.ai/api/v1",
client_kwargs["base_url"] = base_url "api_key": api_key or os.getenv(
"OPENROUTER_API_KEY",
os.getenv("ANTHROPIC_API_KEY",
os.getenv("OPENAI_API_KEY", ""))),
}
self.client = OpenAI(**client_kwargs)
else: else:
client_kwargs["base_url"] = "https://openrouter.ai/api/v1" from agent.auxiliary_client import resolve_provider_client
self.client, _ = resolve_provider_client("openrouter", model=model)
if self.client is None:
# Fallback: try auto-detection
# Handle API key - OpenRouter is the primary provider self.client, _ = resolve_provider_client("auto", model=model)
if api_key: if self.client is None:
client_kwargs["api_key"] = api_key from openai import OpenAI
else: self.client = OpenAI(
client_kwargs["api_key"] = os.getenv( base_url="https://openrouter.ai/api/v1",
"OPENROUTER_API_KEY", api_key=os.getenv("OPENROUTER_API_KEY", ""))
os.getenv("ANTHROPIC_API_KEY", os.getenv("OPENAI_API_KEY", ""))
)
self.client = OpenAI(**client_kwargs)
# Environment will be created per-task # Environment will be created per-task
self.env = None self.env = None

View file

@ -418,36 +418,50 @@ class AIAgent:
]: ]:
logging.getLogger(quiet_logger).setLevel(logging.ERROR) logging.getLogger(quiet_logger).setLevel(logging.ERROR)
# Initialize OpenAI client - defaults to OpenRouter # Initialize OpenAI client via centralized provider router.
client_kwargs = {} # The router handles auth resolution, base URL, headers, and
# Codex wrapping for all known providers.
# Default to OpenRouter if no base_url provided # raw_codex=True because the main agent needs direct responses.stream()
if base_url: # access for Codex Responses API streaming.
client_kwargs["base_url"] = base_url if api_key and base_url:
# Explicit credentials from CLI/gateway — construct directly.
# The runtime provider resolver already handled auth for us.
client_kwargs = {"api_key": api_key, "base_url": base_url}
effective_base = base_url
if "openrouter" in effective_base.lower():
client_kwargs["default_headers"] = {
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
}
elif "api.kimi.com" in effective_base.lower():
client_kwargs["default_headers"] = {
"User-Agent": "KimiCLI/1.0",
}
else: else:
client_kwargs["base_url"] = OPENROUTER_BASE_URL # No explicit creds — use the centralized provider router
from agent.auxiliary_client import resolve_provider_client
# Handle API key - OpenRouter is the primary provider _routed_client, _ = resolve_provider_client(
if api_key: self.provider or "auto", model=self.model, raw_codex=True)
client_kwargs["api_key"] = api_key if _routed_client is not None:
else: client_kwargs = {
# Primary: OPENROUTER_API_KEY, fallback to direct provider keys "api_key": _routed_client.api_key,
client_kwargs["api_key"] = os.getenv("OPENROUTER_API_KEY", "") "base_url": str(_routed_client.base_url),
}
# OpenRouter app attribution — shows hermes-agent in rankings/analytics # Preserve any default_headers the router set
effective_base = client_kwargs.get("base_url", "") if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers:
if "openrouter" in effective_base.lower(): client_kwargs["default_headers"] = dict(_routed_client._default_headers)
client_kwargs["default_headers"] = { else:
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent", # Final fallback: try raw OpenRouter key
"X-OpenRouter-Title": "Hermes Agent", client_kwargs = {
"X-OpenRouter-Categories": "productivity,cli-agent", "api_key": os.getenv("OPENROUTER_API_KEY", ""),
} "base_url": OPENROUTER_BASE_URL,
elif "api.kimi.com" in effective_base.lower(): "default_headers": {
# Kimi Code API requires a recognized coding-agent User-Agent "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
# (see https://github.com/MoonshotAI/kimi-cli) "X-OpenRouter-Title": "Hermes Agent",
client_kwargs["default_headers"] = { "X-OpenRouter-Categories": "productivity,cli-agent",
"User-Agent": "KimiCLI/1.0", },
} }
self._client_kwargs = client_kwargs # stored for rebuilding after interrupt self._client_kwargs = client_kwargs # stored for rebuilding after interrupt
try: try:
@ -2243,75 +2257,6 @@ class AIAgent:
# ── Provider fallback ────────────────────────────────────────────────── # ── Provider fallback ──────────────────────────────────────────────────
# API-key providers: provider → (base_url, [env_var_names])
_FALLBACK_API_KEY_PROVIDERS = {
"openrouter": (OPENROUTER_BASE_URL, ["OPENROUTER_API_KEY"]),
"zai": ("https://api.z.ai/api/paas/v4", ["ZAI_API_KEY", "Z_AI_API_KEY"]),
"kimi-coding": ("https://api.moonshot.ai/v1", ["KIMI_API_KEY"]),
"minimax": ("https://api.minimax.io/v1", ["MINIMAX_API_KEY"]),
"minimax-cn": ("https://api.minimaxi.com/v1", ["MINIMAX_CN_API_KEY"]),
}
# OAuth providers: provider → (resolver_import_path, api_mode)
# Each resolver returns {"api_key": ..., "base_url": ...}.
_FALLBACK_OAUTH_PROVIDERS = {
"openai-codex": ("resolve_codex_runtime_credentials", "codex_responses"),
"nous": ("resolve_nous_runtime_credentials", "chat_completions"),
}
def _resolve_fallback_credentials(
self, fb_provider: str, fb_config: dict
) -> Optional[tuple]:
"""Resolve credentials for a fallback provider.
Returns (api_key, base_url, api_mode) on success, or None on failure.
Handles three cases:
1. OAuth providers (openai-codex, nous) call credential resolver
2. API-key providers (openrouter, zai, etc.) read env var
3. Custom endpoints use base_url + api_key_env from config
"""
# ── 1. OAuth providers ────────────────────────────────────────
if fb_provider in self._FALLBACK_OAUTH_PROVIDERS:
resolver_name, api_mode = self._FALLBACK_OAUTH_PROVIDERS[fb_provider]
try:
import hermes_cli.auth as _auth
resolver = getattr(_auth, resolver_name)
creds = resolver()
return creds["api_key"], creds["base_url"], api_mode
except Exception as e:
logging.warning(
"Fallback to %s failed (credential resolution): %s",
fb_provider, e,
)
return None
# ── 2. API-key providers ──────────────────────────────────────
fb_key = (fb_config.get("api_key") or "").strip()
if not fb_key:
key_env = (fb_config.get("api_key_env") or "").strip()
if key_env:
fb_key = os.getenv(key_env, "")
elif fb_provider in self._FALLBACK_API_KEY_PROVIDERS:
for env_var in self._FALLBACK_API_KEY_PROVIDERS[fb_provider][1]:
fb_key = os.getenv(env_var, "")
if fb_key:
break
if not fb_key:
logging.warning(
"Fallback model configured but no API key found for provider '%s'",
fb_provider,
)
return None
# ── 3. Resolve base URL ───────────────────────────────────────
fb_base_url = (fb_config.get("base_url") or "").strip()
if not fb_base_url and fb_provider in self._FALLBACK_API_KEY_PROVIDERS:
fb_base_url = self._FALLBACK_API_KEY_PROVIDERS[fb_provider][0]
if not fb_base_url:
fb_base_url = OPENROUTER_BASE_URL
return fb_key, fb_base_url, "chat_completions"
def _try_activate_fallback(self) -> bool: def _try_activate_fallback(self) -> bool:
"""Switch to the configured fallback model/provider. """Switch to the configured fallback model/provider.
@ -2319,6 +2264,10 @@ class AIAgent:
OpenAI client, model slug, and provider in-place so the retry loop OpenAI client, model slug, and provider in-place so the retry loop
can continue with the new backend. One-shot: returns False if can continue with the new backend. One-shot: returns False if
already activated or not configured. already activated or not configured.
Uses the centralized provider router (resolve_provider_client) for
auth resolution and client construction no duplicated providerkey
mappings.
""" """
if self._fallback_activated or not self._fallback_model: if self._fallback_activated or not self._fallback_model:
return False return False
@ -2329,25 +2278,31 @@ class AIAgent:
if not fb_provider or not fb_model: if not fb_provider or not fb_model:
return False return False
resolved = self._resolve_fallback_credentials(fb_provider, fb) # Use centralized router for client construction.
if resolved is None: # raw_codex=True because the main agent needs direct responses.stream()
return False # access for Codex providers.
fb_key, fb_base_url, fb_api_mode = resolved
# Build new client
try: try:
client_kwargs = {"api_key": fb_key, "base_url": fb_base_url} from agent.auxiliary_client import resolve_provider_client
if "openrouter" in fb_base_url.lower(): fb_client, _ = resolve_provider_client(
client_kwargs["default_headers"] = { fb_provider, model=fb_model, raw_codex=True)
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent", if fb_client is None:
"X-OpenRouter-Title": "Hermes Agent", logging.warning(
"X-OpenRouter-Categories": "productivity,cli-agent", "Fallback to %s failed: provider not configured",
} fb_provider)
elif "api.kimi.com" in fb_base_url.lower(): return False
client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
self.client = OpenAI(**client_kwargs) # Determine api_mode from provider
self._client_kwargs = client_kwargs fb_api_mode = "chat_completions"
if fb_provider == "openai-codex":
fb_api_mode = "codex_responses"
fb_base_url = str(fb_client.base_url)
# Swap client and config in-place
self.client = fb_client
self._client_kwargs = {
"api_key": fb_client.api_key,
"base_url": fb_base_url,
}
old_model = self.model old_model = self.model
self.model = fb_model self.model = fb_model
self.provider = fb_provider self.provider = fb_provider
@ -2444,16 +2399,26 @@ class AIAgent:
extra_body = {} extra_body = {}
if provider_preferences:
extra_body["provider"] = provider_preferences
_is_openrouter = "openrouter" in self.base_url.lower() _is_openrouter = "openrouter" in self.base_url.lower()
# Provider preferences (only, ignore, order, sort) are OpenRouter-
# specific. Only send to OpenRouter-compatible endpoints.
# TODO: Nous Portal will add transparent proxy support — re-enable
# for _is_nous when their backend is updated.
if provider_preferences and _is_openrouter:
extra_body["provider"] = provider_preferences
_is_nous = "nousresearch" in self.base_url.lower() _is_nous = "nousresearch" in self.base_url.lower()
_is_mistral = "api.mistral.ai" in self.base_url.lower() _is_mistral = "api.mistral.ai" in self.base_url.lower()
if (_is_openrouter or _is_nous) and not _is_mistral: if (_is_openrouter or _is_nous) and not _is_mistral:
if self.reasoning_config is not None: if self.reasoning_config is not None:
extra_body["reasoning"] = self.reasoning_config rc = dict(self.reasoning_config)
# Nous Portal requires reasoning enabled — don't send
# enabled=false to it (would cause 400).
if _is_nous and rc.get("enabled") is False:
pass # omit reasoning entirely for Nous when disabled
else:
extra_body["reasoning"] = rc
else: else:
extra_body["reasoning"] = { extra_body["reasoning"] = {
"enabled": True, "enabled": True,
@ -2630,19 +2595,22 @@ class AIAgent:
# Use auxiliary client for the flush call when available -- # Use auxiliary client for the flush call when available --
# it's cheaper and avoids Codex Responses API incompatibility. # it's cheaper and avoids Codex Responses API incompatibility.
from agent.auxiliary_client import get_text_auxiliary_client from agent.auxiliary_client import call_llm as _call_llm
aux_client, aux_model = get_text_auxiliary_client() _aux_available = True
try:
response = _call_llm(
task="flush_memories",
messages=api_messages,
tools=[memory_tool_def],
temperature=0.3,
max_tokens=5120,
timeout=30.0,
)
except RuntimeError:
_aux_available = False
response = None
if aux_client: if not _aux_available and self.api_mode == "codex_responses":
api_kwargs = {
"model": aux_model,
"messages": api_messages,
"tools": [memory_tool_def],
"temperature": 0.3,
"max_tokens": 5120,
}
response = aux_client.chat.completions.create(**api_kwargs, timeout=30.0)
elif self.api_mode == "codex_responses":
# No auxiliary client -- use the Codex Responses path directly # No auxiliary client -- use the Codex Responses path directly
codex_kwargs = self._build_api_kwargs(api_messages) codex_kwargs = self._build_api_kwargs(api_messages)
codex_kwargs["tools"] = self._responses_tools([memory_tool_def]) codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
@ -2650,7 +2618,7 @@ class AIAgent:
if "max_output_tokens" in codex_kwargs: if "max_output_tokens" in codex_kwargs:
codex_kwargs["max_output_tokens"] = 5120 codex_kwargs["max_output_tokens"] = 5120
response = self._run_codex_stream(codex_kwargs) response = self._run_codex_stream(codex_kwargs)
else: elif not _aux_available:
api_kwargs = { api_kwargs = {
"model": self.model, "model": self.model,
"messages": api_messages, "messages": api_messages,
@ -2662,7 +2630,7 @@ class AIAgent:
# Extract tool calls from the response, handling both API formats # Extract tool calls from the response, handling both API formats
tool_calls = [] tool_calls = []
if self.api_mode == "codex_responses" and not aux_client: if self.api_mode == "codex_responses" and not _aux_available:
assistant_msg, _ = self._normalize_codex_response(response) assistant_msg, _ = self._normalize_codex_response(response)
if assistant_msg and assistant_msg.tool_calls: if assistant_msg and assistant_msg.tool_calls:
tool_calls = assistant_msg.tool_calls tool_calls = assistant_msg.tool_calls

View file

@ -9,8 +9,7 @@ from agent.context_compressor import ContextCompressor
@pytest.fixture() @pytest.fixture()
def compressor(): def compressor():
"""Create a ContextCompressor with mocked dependencies.""" """Create a ContextCompressor with mocked dependencies."""
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \ with patch("agent.context_compressor.get_model_context_length", return_value=100000):
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(None, None)):
c = ContextCompressor( c = ContextCompressor(
model="test/model", model="test/model",
threshold_percent=0.85, threshold_percent=0.85,
@ -119,14 +118,11 @@ class TestGenerateSummaryNoneContent:
"""Regression: content=None (from tool-call-only assistant messages) must not crash.""" """Regression: content=None (from tool-call-only assistant messages) must not crash."""
def test_none_content_does_not_crash(self): def test_none_content_does_not_crash(self):
mock_client = MagicMock()
mock_response = MagicMock() mock_response = MagicMock()
mock_response.choices = [MagicMock()] mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: tool calls happened" mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: tool calls happened"
mock_client.chat.completions.create.return_value = mock_response
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \ with patch("agent.context_compressor.get_model_context_length", return_value=100000):
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
c = ContextCompressor(model="test", quiet_mode=True) c = ContextCompressor(model="test", quiet_mode=True)
messages = [ messages = [
@ -139,14 +135,14 @@ class TestGenerateSummaryNoneContent:
{"role": "user", "content": "thanks"}, {"role": "user", "content": "thanks"},
] ]
summary = c._generate_summary(messages) with patch("agent.context_compressor.call_llm", return_value=mock_response):
summary = c._generate_summary(messages)
assert isinstance(summary, str) assert isinstance(summary, str)
assert "CONTEXT SUMMARY" in summary assert "CONTEXT SUMMARY" in summary
def test_none_content_in_system_message_compress(self): def test_none_content_in_system_message_compress(self):
"""System message with content=None should not crash during compress.""" """System message with content=None should not crash during compress."""
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \ with patch("agent.context_compressor.get_model_context_length", return_value=100000):
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(None, None)):
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2) c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
msgs = [{"role": "system", "content": None}] + [ msgs = [{"role": "system", "content": None}] + [
@ -165,12 +161,12 @@ class TestCompressWithClient:
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened" mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened"
mock_client.chat.completions.create.return_value = mock_response mock_client.chat.completions.create.return_value = mock_response
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \ with patch("agent.context_compressor.get_model_context_length", return_value=100000):
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
c = ContextCompressor(model="test", quiet_mode=True) c = ContextCompressor(model="test", quiet_mode=True)
msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)] msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
result = c.compress(msgs) with patch("agent.context_compressor.call_llm", return_value=mock_response):
result = c.compress(msgs)
# Should have summary message in the middle # Should have summary message in the middle
contents = [m.get("content", "") for m in result] contents = [m.get("content", "") for m in result]
@ -184,8 +180,7 @@ class TestCompressWithClient:
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: compressed middle" mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: compressed middle"
mock_client.chat.completions.create.return_value = mock_response mock_client.chat.completions.create.return_value = mock_response
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \ with patch("agent.context_compressor.get_model_context_length", return_value=100000):
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
c = ContextCompressor( c = ContextCompressor(
model="test", model="test",
quiet_mode=True, quiet_mode=True,
@ -212,7 +207,8 @@ class TestCompressWithClient:
{"role": "user", "content": "later 4"}, {"role": "user", "content": "later 4"},
] ]
result = c.compress(msgs) with patch("agent.context_compressor.call_llm", return_value=mock_response):
result = c.compress(msgs)
answered_ids = { answered_ids = {
msg.get("tool_call_id") msg.get("tool_call_id")
@ -232,8 +228,7 @@ class TestCompressWithClient:
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened" mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened"
mock_client.chat.completions.create.return_value = mock_response mock_client.chat.completions.create.return_value = mock_response
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \ with patch("agent.context_compressor.get_model_context_length", return_value=100000):
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2) c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
# Last head message (index 1) is "assistant" → summary should be "user" # Last head message (index 1) is "assistant" → summary should be "user"
@ -245,7 +240,8 @@ class TestCompressWithClient:
{"role": "user", "content": "msg 4"}, {"role": "user", "content": "msg 4"},
{"role": "assistant", "content": "msg 5"}, {"role": "assistant", "content": "msg 5"},
] ]
result = c.compress(msgs) with patch("agent.context_compressor.call_llm", return_value=mock_response):
result = c.compress(msgs)
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")] summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
assert len(summary_msg) == 1 assert len(summary_msg) == 1
assert summary_msg[0]["role"] == "user" assert summary_msg[0]["role"] == "user"
@ -258,8 +254,7 @@ class TestCompressWithClient:
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened" mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened"
mock_client.chat.completions.create.return_value = mock_response mock_client.chat.completions.create.return_value = mock_response
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \ with patch("agent.context_compressor.get_model_context_length", return_value=100000):
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3, protect_last_n=2) c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3, protect_last_n=2)
# Last head message (index 2) is "user" → summary should be "assistant" # Last head message (index 2) is "user" → summary should be "assistant"
@ -273,20 +268,18 @@ class TestCompressWithClient:
{"role": "user", "content": "msg 6"}, {"role": "user", "content": "msg 6"},
{"role": "assistant", "content": "msg 7"}, {"role": "assistant", "content": "msg 7"},
] ]
result = c.compress(msgs) with patch("agent.context_compressor.call_llm", return_value=mock_response):
result = c.compress(msgs)
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")] summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
assert len(summary_msg) == 1 assert len(summary_msg) == 1
assert summary_msg[0]["role"] == "assistant" assert summary_msg[0]["role"] == "assistant"
def test_summarization_does_not_start_tail_with_tool_outputs(self): def test_summarization_does_not_start_tail_with_tool_outputs(self):
mock_client = MagicMock()
mock_response = MagicMock() mock_response = MagicMock()
mock_response.choices = [MagicMock()] mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: compressed middle" mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: compressed middle"
mock_client.chat.completions.create.return_value = mock_response
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \ with patch("agent.context_compressor.get_model_context_length", return_value=100000):
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
c = ContextCompressor( c = ContextCompressor(
model="test", model="test",
quiet_mode=True, quiet_mode=True,
@ -309,7 +302,8 @@ class TestCompressWithClient:
{"role": "user", "content": "latest user"}, {"role": "user", "content": "latest user"},
] ]
result = c.compress(msgs) with patch("agent.context_compressor.call_llm", return_value=mock_response):
result = c.compress(msgs)
called_ids = { called_ids = {
tc["id"] tc["id"]

View file

@ -103,7 +103,7 @@ def test_custom_setup_clears_active_oauth_provider(tmp_path, monkeypatch):
config = load_config() config = load_config()
monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 4) monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 3)
prompt_values = iter( prompt_values = iter(
[ [

View file

@ -579,7 +579,7 @@ class WebToolsTester:
"results": self.test_results, "results": self.test_results,
"environment": { "environment": {
"firecrawl_api_key": check_firecrawl_api_key(), "firecrawl_api_key": check_firecrawl_api_key(),
"nous_api_key": check_auxiliary_model(), "auxiliary_model": check_auxiliary_model(),
"debug_mode": get_debug_session_info()["enabled"] "debug_mode": get_debug_session_info()["enabled"]
} }
} }

View file

@ -229,13 +229,14 @@ class TestVisionModelOverride:
def test_default_model_when_no_override(self, monkeypatch): def test_default_model_when_no_override(self, monkeypatch):
monkeypatch.delenv("AUXILIARY_VISION_MODEL", raising=False) monkeypatch.delenv("AUXILIARY_VISION_MODEL", raising=False)
from tools.vision_tools import _handle_vision_analyze, DEFAULT_VISION_MODEL from tools.vision_tools import _handle_vision_analyze
with patch("tools.vision_tools.vision_analyze_tool", new_callable=MagicMock) as mock_tool: with patch("tools.vision_tools.vision_analyze_tool", new_callable=MagicMock) as mock_tool:
mock_tool.return_value = '{"success": true}' mock_tool.return_value = '{"success": true}'
_handle_vision_analyze({"image_url": "http://test.jpg", "question": "test"}) _handle_vision_analyze({"image_url": "http://test.jpg", "question": "test"})
call_args = mock_tool.call_args call_args = mock_tool.call_args
expected = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview" # With no AUXILIARY_VISION_MODEL env var, model should be None
assert call_args[0][2] == expected # (the centralized call_llm router picks the provider default)
assert call_args[0][2] is None
# ── DEFAULT_CONFIG shape tests ─────────────────────────────────────────────── # ── DEFAULT_CONFIG shape tests ───────────────────────────────────────────────

View file

@ -93,8 +93,8 @@ class TestModelCommand:
output = capsys.readouterr().out output = capsys.readouterr().out
assert "anthropic/claude-opus-4.6" in output assert "anthropic/claude-opus-4.6" in output
assert "OpenRouter" in output assert "OpenRouter" in output
assert "Available models" in output assert "Authenticated providers" in output or "Switch model" in output
assert "provider:model-name" in output assert "provider" in output and "model" in output
# -- provider switching tests ------------------------------------------- # -- provider switching tests -------------------------------------------

View file

@ -197,21 +197,28 @@ def test_codex_provider_replaces_incompatible_default_model(monkeypatch):
assert shell.model == "gpt-5.2-codex" assert shell.model == "gpt-5.2-codex"
def test_codex_provider_trusts_explicit_envvar_model(monkeypatch): def test_codex_provider_uses_config_model(monkeypatch):
"""When the user explicitly sets LLM_MODEL, we trust their choice and """Model comes from config.yaml, not LLM_MODEL env var.
let the API be the judge even if it's a non-OpenAI model. Only Config.yaml is the single source of truth to avoid multi-agent conflicts."""
provider prefixes are stripped; the bare model passes through."""
cli = _import_cli() cli = _import_cli()
monkeypatch.setenv("LLM_MODEL", "claude-opus-4-6") # LLM_MODEL env var should be IGNORED (even if set)
monkeypatch.setenv("LLM_MODEL", "should-be-ignored")
monkeypatch.delenv("OPENAI_MODEL", raising=False) monkeypatch.delenv("OPENAI_MODEL", raising=False)
# Set model via config
monkeypatch.setitem(cli.CLI_CONFIG, "model", {
"default": "gpt-5.2-codex",
"provider": "openai-codex",
"base_url": "https://chatgpt.com/backend-api/codex",
})
def _runtime_resolve(**kwargs): def _runtime_resolve(**kwargs):
return { return {
"provider": "openai-codex", "provider": "openai-codex",
"api_mode": "codex_responses", "api_mode": "codex_responses",
"base_url": "https://chatgpt.com/backend-api/codex", "base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "test-key", "api_key": "fake-codex-token",
"source": "env/config", "source": "env/config",
} }
@ -220,11 +227,12 @@ def test_codex_provider_trusts_explicit_envvar_model(monkeypatch):
shell = cli.HermesCLI(compact=True, max_turns=1) shell = cli.HermesCLI(compact=True, max_turns=1)
assert shell._model_is_default is False
assert shell._ensure_runtime_credentials() is True assert shell._ensure_runtime_credentials() is True
assert shell.provider == "openai-codex" assert shell.provider == "openai-codex"
# User explicitly chose this model — it passes through untouched # Model from config (may be normalized by codex provider logic)
assert shell.model == "claude-opus-4-6" assert "codex" in shell.model.lower()
# LLM_MODEL env var is NOT used
assert shell.model != "should-be-ignored"
def test_codex_provider_preserves_explicit_codex_model(monkeypatch): def test_codex_provider_preserves_explicit_codex_model(monkeypatch):

View file

@ -35,7 +35,7 @@ def _make_agent(fallback_model=None):
patch("run_agent.OpenAI"), patch("run_agent.OpenAI"),
): ):
agent = AIAgent( agent = AIAgent(
api_key="test-key-primary", api_key="test-key",
quiet_mode=True, quiet_mode=True,
skip_context_files=True, skip_context_files=True,
skip_memory=True, skip_memory=True,
@ -45,6 +45,14 @@ def _make_agent(fallback_model=None):
return agent return agent
def _mock_resolve(base_url="https://openrouter.ai/api/v1", api_key="test-key"):
"""Helper to create a mock client for resolve_provider_client."""
mock_client = MagicMock()
mock_client.api_key = api_key
mock_client.base_url = base_url
return mock_client
# ============================================================================= # =============================================================================
# _try_activate_fallback() # _try_activate_fallback()
# ============================================================================= # =============================================================================
@ -71,9 +79,13 @@ class TestTryActivateFallback:
agent = _make_agent( agent = _make_agent(
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
) )
with ( mock_client = _mock_resolve(
patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-fallback-key"}), api_key="sk-or-fallback-key",
patch("run_agent.OpenAI") as mock_openai, base_url="https://openrouter.ai/api/v1",
)
with patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, "anthropic/claude-sonnet-4"),
): ):
result = agent._try_activate_fallback() result = agent._try_activate_fallback()
assert result is True assert result is True
@ -81,36 +93,37 @@ class TestTryActivateFallback:
assert agent.model == "anthropic/claude-sonnet-4" assert agent.model == "anthropic/claude-sonnet-4"
assert agent.provider == "openrouter" assert agent.provider == "openrouter"
assert agent.api_mode == "chat_completions" assert agent.api_mode == "chat_completions"
mock_openai.assert_called_once() assert agent.client is mock_client
call_kwargs = mock_openai.call_args[1]
assert call_kwargs["api_key"] == "sk-or-fallback-key"
assert "openrouter" in call_kwargs["base_url"].lower()
# OpenRouter should get attribution headers
assert "default_headers" in call_kwargs
def test_activates_zai_fallback(self): def test_activates_zai_fallback(self):
agent = _make_agent( agent = _make_agent(
fallback_model={"provider": "zai", "model": "glm-5"}, fallback_model={"provider": "zai", "model": "glm-5"},
) )
with ( mock_client = _mock_resolve(
patch.dict("os.environ", {"ZAI_API_KEY": "sk-zai-key"}), api_key="sk-zai-key",
patch("run_agent.OpenAI") as mock_openai, base_url="https://open.z.ai/api/v1",
)
with patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, "glm-5"),
): ):
result = agent._try_activate_fallback() result = agent._try_activate_fallback()
assert result is True assert result is True
assert agent.model == "glm-5" assert agent.model == "glm-5"
assert agent.provider == "zai" assert agent.provider == "zai"
call_kwargs = mock_openai.call_args[1] assert agent.client is mock_client
assert call_kwargs["api_key"] == "sk-zai-key"
assert "z.ai" in call_kwargs["base_url"].lower()
def test_activates_kimi_fallback(self): def test_activates_kimi_fallback(self):
agent = _make_agent( agent = _make_agent(
fallback_model={"provider": "kimi-coding", "model": "kimi-k2.5"}, fallback_model={"provider": "kimi-coding", "model": "kimi-k2.5"},
) )
with ( mock_client = _mock_resolve(
patch.dict("os.environ", {"KIMI_API_KEY": "sk-kimi-key"}), api_key="sk-kimi-key",
patch("run_agent.OpenAI"), base_url="https://api.moonshot.ai/v1",
)
with patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, "kimi-k2.5"),
): ):
assert agent._try_activate_fallback() is True assert agent._try_activate_fallback() is True
assert agent.model == "kimi-k2.5" assert agent.model == "kimi-k2.5"
@ -120,23 +133,30 @@ class TestTryActivateFallback:
agent = _make_agent( agent = _make_agent(
fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"}, fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
) )
with ( mock_client = _mock_resolve(
patch.dict("os.environ", {"MINIMAX_API_KEY": "sk-mm-key"}), api_key="sk-mm-key",
patch("run_agent.OpenAI") as mock_openai, base_url="https://api.minimax.io/v1",
)
with patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, "MiniMax-M2.5"),
): ):
assert agent._try_activate_fallback() is True assert agent._try_activate_fallback() is True
assert agent.model == "MiniMax-M2.5" assert agent.model == "MiniMax-M2.5"
assert agent.provider == "minimax" assert agent.provider == "minimax"
call_kwargs = mock_openai.call_args[1] assert agent.client is mock_client
assert "minimax.io" in call_kwargs["base_url"]
def test_only_fires_once(self): def test_only_fires_once(self):
agent = _make_agent( agent = _make_agent(
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
) )
with ( mock_client = _mock_resolve(
patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}), api_key="sk-or-key",
patch("run_agent.OpenAI"), base_url="https://openrouter.ai/api/v1",
)
with patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, "anthropic/claude-sonnet-4"),
): ):
assert agent._try_activate_fallback() is True assert agent._try_activate_fallback() is True
# Second attempt should return False # Second attempt should return False
@ -147,9 +167,10 @@ class TestTryActivateFallback:
agent = _make_agent( agent = _make_agent(
fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"}, fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
) )
# Ensure MINIMAX_API_KEY is not in the environment with patch(
env = {k: v for k, v in os.environ.items() if k != "MINIMAX_API_KEY"} "agent.auxiliary_client.resolve_provider_client",
with patch.dict("os.environ", env, clear=True): return_value=(None, None),
):
assert agent._try_activate_fallback() is False assert agent._try_activate_fallback() is False
assert agent._fallback_activated is False assert agent._fallback_activated is False
@ -163,22 +184,29 @@ class TestTryActivateFallback:
"api_key_env": "MY_CUSTOM_KEY", "api_key_env": "MY_CUSTOM_KEY",
}, },
) )
with ( mock_client = _mock_resolve(
patch.dict("os.environ", {"MY_CUSTOM_KEY": "custom-secret"}), api_key="custom-secret",
patch("run_agent.OpenAI") as mock_openai, base_url="http://localhost:8080/v1",
)
with patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, "my-model"),
): ):
assert agent._try_activate_fallback() is True assert agent._try_activate_fallback() is True
call_kwargs = mock_openai.call_args[1] assert agent.client is mock_client
assert call_kwargs["base_url"] == "http://localhost:8080/v1" assert agent.model == "my-model"
assert call_kwargs["api_key"] == "custom-secret"
def test_prompt_caching_enabled_for_claude_on_openrouter(self): def test_prompt_caching_enabled_for_claude_on_openrouter(self):
agent = _make_agent( agent = _make_agent(
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
) )
with ( mock_client = _mock_resolve(
patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}), api_key="sk-or-key",
patch("run_agent.OpenAI"), base_url="https://openrouter.ai/api/v1",
)
with patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, "anthropic/claude-sonnet-4"),
): ):
agent._try_activate_fallback() agent._try_activate_fallback()
assert agent._use_prompt_caching is True assert agent._use_prompt_caching is True
@ -187,9 +215,13 @@ class TestTryActivateFallback:
agent = _make_agent( agent = _make_agent(
fallback_model={"provider": "openrouter", "model": "google/gemini-2.5-flash"}, fallback_model={"provider": "openrouter", "model": "google/gemini-2.5-flash"},
) )
with ( mock_client = _mock_resolve(
patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}), api_key="sk-or-key",
patch("run_agent.OpenAI"), base_url="https://openrouter.ai/api/v1",
)
with patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, "google/gemini-2.5-flash"),
): ):
agent._try_activate_fallback() agent._try_activate_fallback()
assert agent._use_prompt_caching is False assert agent._use_prompt_caching is False
@ -198,9 +230,13 @@ class TestTryActivateFallback:
agent = _make_agent( agent = _make_agent(
fallback_model={"provider": "zai", "model": "glm-5"}, fallback_model={"provider": "zai", "model": "glm-5"},
) )
with ( mock_client = _mock_resolve(
patch.dict("os.environ", {"ZAI_API_KEY": "sk-zai-key"}), api_key="sk-zai-key",
patch("run_agent.OpenAI"), base_url="https://open.z.ai/api/v1",
)
with patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, "glm-5"),
): ):
agent._try_activate_fallback() agent._try_activate_fallback()
assert agent._use_prompt_caching is False assert agent._use_prompt_caching is False
@ -210,35 +246,36 @@ class TestTryActivateFallback:
agent = _make_agent( agent = _make_agent(
fallback_model={"provider": "zai", "model": "glm-5"}, fallback_model={"provider": "zai", "model": "glm-5"},
) )
with ( mock_client = _mock_resolve(
patch.dict("os.environ", {"Z_AI_API_KEY": "sk-alt-key"}), api_key="sk-alt-key",
patch("run_agent.OpenAI") as mock_openai, base_url="https://open.z.ai/api/v1",
)
with patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, "glm-5"),
): ):
assert agent._try_activate_fallback() is True assert agent._try_activate_fallback() is True
call_kwargs = mock_openai.call_args[1] assert agent.client is mock_client
assert call_kwargs["api_key"] == "sk-alt-key"
def test_activates_codex_fallback(self): def test_activates_codex_fallback(self):
"""OpenAI Codex fallback should use OAuth credentials and codex_responses mode.""" """OpenAI Codex fallback should use OAuth credentials and codex_responses mode."""
agent = _make_agent( agent = _make_agent(
fallback_model={"provider": "openai-codex", "model": "gpt-5.3-codex"}, fallback_model={"provider": "openai-codex", "model": "gpt-5.3-codex"},
) )
mock_creds = { mock_client = _mock_resolve(
"api_key": "codex-oauth-token", api_key="codex-oauth-token",
"base_url": "https://chatgpt.com/backend-api/codex", base_url="https://chatgpt.com/backend-api/codex",
} )
with ( with patch(
patch("hermes_cli.auth.resolve_codex_runtime_credentials", return_value=mock_creds), "agent.auxiliary_client.resolve_provider_client",
patch("run_agent.OpenAI") as mock_openai, return_value=(mock_client, "gpt-5.3-codex"),
): ):
result = agent._try_activate_fallback() result = agent._try_activate_fallback()
assert result is True assert result is True
assert agent.model == "gpt-5.3-codex" assert agent.model == "gpt-5.3-codex"
assert agent.provider == "openai-codex" assert agent.provider == "openai-codex"
assert agent.api_mode == "codex_responses" assert agent.api_mode == "codex_responses"
call_kwargs = mock_openai.call_args[1] assert agent.client is mock_client
assert call_kwargs["api_key"] == "codex-oauth-token"
assert "chatgpt.com" in call_kwargs["base_url"]
def test_codex_fallback_fails_gracefully_without_credentials(self): def test_codex_fallback_fails_gracefully_without_credentials(self):
"""Codex fallback should return False if no OAuth credentials available.""" """Codex fallback should return False if no OAuth credentials available."""
@ -246,8 +283,8 @@ class TestTryActivateFallback:
fallback_model={"provider": "openai-codex", "model": "gpt-5.3-codex"}, fallback_model={"provider": "openai-codex", "model": "gpt-5.3-codex"},
) )
with patch( with patch(
"hermes_cli.auth.resolve_codex_runtime_credentials", "agent.auxiliary_client.resolve_provider_client",
side_effect=Exception("No Codex credentials"), return_value=(None, None),
): ):
assert agent._try_activate_fallback() is False assert agent._try_activate_fallback() is False
assert agent._fallback_activated is False assert agent._fallback_activated is False
@ -257,22 +294,20 @@ class TestTryActivateFallback:
agent = _make_agent( agent = _make_agent(
fallback_model={"provider": "nous", "model": "nous-hermes-3"}, fallback_model={"provider": "nous", "model": "nous-hermes-3"},
) )
mock_creds = { mock_client = _mock_resolve(
"api_key": "nous-agent-key-abc", api_key="nous-agent-key-abc",
"base_url": "https://inference-api.nousresearch.com/v1", base_url="https://inference-api.nousresearch.com/v1",
} )
with ( with patch(
patch("hermes_cli.auth.resolve_nous_runtime_credentials", return_value=mock_creds), "agent.auxiliary_client.resolve_provider_client",
patch("run_agent.OpenAI") as mock_openai, return_value=(mock_client, "nous-hermes-3"),
): ):
result = agent._try_activate_fallback() result = agent._try_activate_fallback()
assert result is True assert result is True
assert agent.model == "nous-hermes-3" assert agent.model == "nous-hermes-3"
assert agent.provider == "nous" assert agent.provider == "nous"
assert agent.api_mode == "chat_completions" assert agent.api_mode == "chat_completions"
call_kwargs = mock_openai.call_args[1] assert agent.client is mock_client
assert call_kwargs["api_key"] == "nous-agent-key-abc"
assert "nousresearch.com" in call_kwargs["base_url"]
def test_nous_fallback_fails_gracefully_without_login(self): def test_nous_fallback_fails_gracefully_without_login(self):
"""Nous fallback should return False if not logged in.""" """Nous fallback should return False if not logged in."""
@ -280,8 +315,8 @@ class TestTryActivateFallback:
fallback_model={"provider": "nous", "model": "nous-hermes-3"}, fallback_model={"provider": "nous", "model": "nous-hermes-3"},
) )
with patch( with patch(
"hermes_cli.auth.resolve_nous_runtime_credentials", "agent.auxiliary_client.resolve_provider_client",
side_effect=Exception("Not logged in to Nous Portal"), return_value=(None, None),
): ):
assert agent._try_activate_fallback() is False assert agent._try_activate_fallback() is False
assert agent._fallback_activated is False assert agent._fallback_activated is False
@ -315,7 +350,7 @@ class TestFallbackInit:
# ============================================================================= # =============================================================================
class TestProviderCredentials: class TestProviderCredentials:
"""Verify that each supported provider resolves its API key correctly.""" """Verify that each supported provider resolves via the centralized router."""
@pytest.mark.parametrize("provider,env_var,base_url_fragment", [ @pytest.mark.parametrize("provider,env_var,base_url_fragment", [
("openrouter", "OPENROUTER_API_KEY", "openrouter"), ("openrouter", "OPENROUTER_API_KEY", "openrouter"),
@ -328,12 +363,15 @@ class TestProviderCredentials:
agent = _make_agent( agent = _make_agent(
fallback_model={"provider": provider, "model": "test-model"}, fallback_model={"provider": provider, "model": "test-model"},
) )
with ( mock_client = MagicMock()
patch.dict("os.environ", {env_var: "test-key-123"}), mock_client.api_key = "test-api-key"
patch("run_agent.OpenAI") as mock_openai, mock_client.base_url = f"https://{base_url_fragment}/v1"
with patch(
"agent.auxiliary_client.resolve_provider_client",
return_value=(mock_client, "test-model"),
): ):
result = agent._try_activate_fallback() result = agent._try_activate_fallback()
assert result is True, f"Failed to activate fallback for {provider}" assert result is True, f"Failed to activate fallback for {provider}"
call_kwargs = mock_openai.call_args[1] assert agent.client is mock_client
assert call_kwargs["api_key"] == "test-key-123" assert agent.model == "test-model"
assert base_url_fragment in call_kwargs["base_url"].lower() assert agent.provider == provider

View file

@ -98,10 +98,9 @@ class TestFlushMemoriesUsesAuxiliaryClient:
def test_flush_uses_auxiliary_when_available(self, monkeypatch): def test_flush_uses_auxiliary_when_available(self, monkeypatch):
agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex") agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
mock_aux_client = MagicMock() mock_response = _chat_response_with_memory_call()
mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")): with patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_call:
messages = [ messages = [
{"role": "user", "content": "Hello"}, {"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there"}, {"role": "assistant", "content": "Hi there"},
@ -110,9 +109,9 @@ class TestFlushMemoriesUsesAuxiliaryClient:
with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory: with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
agent.flush_memories(messages) agent.flush_memories(messages)
mock_aux_client.chat.completions.create.assert_called_once() mock_call.assert_called_once()
call_kwargs = mock_aux_client.chat.completions.create.call_args call_kwargs = mock_call.call_args
assert call_kwargs.kwargs.get("model") == "gpt-4o-mini" or call_kwargs[1].get("model") == "gpt-4o-mini" assert call_kwargs.kwargs.get("task") == "flush_memories"
def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch): def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch):
"""Non-Codex mode with no auxiliary falls back to self.client.""" """Non-Codex mode with no auxiliary falls back to self.client."""
@ -120,7 +119,7 @@ class TestFlushMemoriesUsesAuxiliaryClient:
agent.client = MagicMock() agent.client = MagicMock()
agent.client.chat.completions.create.return_value = _chat_response_with_memory_call() agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)): with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")):
messages = [ messages = [
{"role": "user", "content": "Hello"}, {"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there"}, {"role": "assistant", "content": "Hi there"},
@ -135,10 +134,9 @@ class TestFlushMemoriesUsesAuxiliaryClient:
"""Verify that memory tool calls from the flush response actually get executed.""" """Verify that memory tool calls from the flush response actually get executed."""
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter") agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
mock_aux_client = MagicMock() mock_response = _chat_response_with_memory_call()
mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")): with patch("agent.auxiliary_client.call_llm", return_value=mock_response):
messages = [ messages = [
{"role": "user", "content": "Hello"}, {"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi"}, {"role": "assistant", "content": "Hi"},
@ -157,10 +155,9 @@ class TestFlushMemoriesUsesAuxiliaryClient:
"""After flush, the flush prompt and any response should be removed from messages.""" """After flush, the flush prompt and any response should be removed from messages."""
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter") agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
mock_aux_client = MagicMock() mock_response = _chat_response_with_memory_call()
mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")): with patch("agent.auxiliary_client.call_llm", return_value=mock_response):
messages = [ messages = [
{"role": "user", "content": "Hello"}, {"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi"}, {"role": "assistant", "content": "Hi"},
@ -202,7 +199,7 @@ class TestFlushMemoriesCodexFallback:
model="gpt-5-codex", model="gpt-5-codex",
) )
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)), \ with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")), \
patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \ patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
patch.object(agent, "_build_api_kwargs") as mock_build, \ patch.object(agent, "_build_api_kwargs") as mock_build, \
patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory: patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:

View file

@ -959,7 +959,7 @@ class TestFlushSentinelNotLeaked:
agent.client.chat.completions.create.return_value = mock_response agent.client.chat.completions.create.return_value = mock_response
# Bypass auxiliary client so flush uses agent.client directly # Bypass auxiliary client so flush uses agent.client directly
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)): with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")):
agent.flush_memories(messages, min_turns=0) agent.flush_memories(messages, min_turns=0)
# Check what was actually sent to the API # Check what was actually sent to the API

View file

@ -158,29 +158,6 @@ def test_custom_endpoint_auto_provider_prefers_openai_key(monkeypatch):
assert resolved["api_key"] == "sk-vllm-key" assert resolved["api_key"] == "sk-vllm-key"
def test_resolve_runtime_provider_nous_api(monkeypatch):
"""Nous Portal API key provider resolves via the api_key path."""
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "nous-api")
monkeypatch.setattr(
rp,
"resolve_api_key_provider_credentials",
lambda pid: {
"provider": "nous-api",
"api_key": "nous-test-key",
"base_url": "https://inference-api.nousresearch.com/v1",
"source": "NOUS_API_KEY",
},
)
resolved = rp.resolve_runtime_provider(requested="nous-api")
assert resolved["provider"] == "nous-api"
assert resolved["api_mode"] == "chat_completions"
assert resolved["base_url"] == "https://inference-api.nousresearch.com/v1"
assert resolved["api_key"] == "nous-test-key"
assert resolved["requested_provider"] == "nous-api"
def test_explicit_openrouter_skips_openai_base_url(monkeypatch): def test_explicit_openrouter_skips_openai_base_url(monkeypatch):
"""When the user explicitly requests openrouter, OPENAI_BASE_URL """When the user explicitly requests openrouter, OPENAI_BASE_URL
(which may point to a custom endpoint) must not override the (which may point to a custom endpoint) must not override the

View file

@ -137,8 +137,7 @@ class TestBrowserVisionAnnotate:
with ( with (
patch("tools.browser_tool._run_browser_command") as mock_cmd, patch("tools.browser_tool._run_browser_command") as mock_cmd,
patch("tools.browser_tool._aux_vision_client") as mock_client, patch("tools.browser_tool.call_llm") as mock_call_llm,
patch("tools.browser_tool._DEFAULT_VISION_MODEL", "test-model"),
patch("tools.browser_tool._get_vision_model", return_value="test-model"), patch("tools.browser_tool._get_vision_model", return_value="test-model"),
): ):
mock_cmd.return_value = {"success": True, "data": {}} mock_cmd.return_value = {"success": True, "data": {}}
@ -159,8 +158,7 @@ class TestBrowserVisionAnnotate:
with ( with (
patch("tools.browser_tool._run_browser_command") as mock_cmd, patch("tools.browser_tool._run_browser_command") as mock_cmd,
patch("tools.browser_tool._aux_vision_client") as mock_client, patch("tools.browser_tool.call_llm") as mock_call_llm,
patch("tools.browser_tool._DEFAULT_VISION_MODEL", "test-model"),
patch("tools.browser_tool._get_vision_model", return_value="test-model"), patch("tools.browser_tool._get_vision_model", return_value="test-model"),
): ):
mock_cmd.return_value = {"success": True, "data": {}} mock_cmd.return_value = {"success": True, "data": {}}

View file

@ -1828,8 +1828,8 @@ class TestSamplingCallbackText:
) )
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), return_value=fake_client.chat.completions.create.return_value,
): ):
params = _make_sampling_params() params = _make_sampling_params()
result = asyncio.run(self.handler(None, params)) result = asyncio.run(self.handler(None, params))
@ -1847,13 +1847,13 @@ class TestSamplingCallbackText:
fake_client.chat.completions.create.return_value = _make_llm_response() fake_client.chat.completions.create.return_value = _make_llm_response()
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), return_value=fake_client.chat.completions.create.return_value,
): ) as mock_call:
params = _make_sampling_params(system_prompt="Be helpful") params = _make_sampling_params(system_prompt="Be helpful")
asyncio.run(self.handler(None, params)) asyncio.run(self.handler(None, params))
call_args = fake_client.chat.completions.create.call_args call_args = mock_call.call_args
messages = call_args.kwargs["messages"] messages = call_args.kwargs["messages"]
assert messages[0] == {"role": "system", "content": "Be helpful"} assert messages[0] == {"role": "system", "content": "Be helpful"}
@ -1865,8 +1865,8 @@ class TestSamplingCallbackText:
) )
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), return_value=fake_client.chat.completions.create.return_value,
): ):
params = _make_sampling_params() params = _make_sampling_params()
result = asyncio.run(self.handler(None, params)) result = asyncio.run(self.handler(None, params))
@ -1889,8 +1889,8 @@ class TestSamplingCallbackToolUse:
fake_client.chat.completions.create.return_value = _make_llm_tool_response() fake_client.chat.completions.create.return_value = _make_llm_tool_response()
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), return_value=fake_client.chat.completions.create.return_value,
): ):
params = _make_sampling_params() params = _make_sampling_params()
result = asyncio.run(self.handler(None, params)) result = asyncio.run(self.handler(None, params))
@ -1916,8 +1916,8 @@ class TestSamplingCallbackToolUse:
) )
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), return_value=fake_client.chat.completions.create.return_value,
): ):
result = asyncio.run(self.handler(None, _make_sampling_params())) result = asyncio.run(self.handler(None, _make_sampling_params()))
@ -1939,8 +1939,8 @@ class TestToolLoopGovernance:
fake_client.chat.completions.create.return_value = _make_llm_tool_response() fake_client.chat.completions.create.return_value = _make_llm_tool_response()
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), return_value=fake_client.chat.completions.create.return_value,
): ):
params = _make_sampling_params() params = _make_sampling_params()
# Round 1, 2: allowed # Round 1, 2: allowed
@ -1956,24 +1956,26 @@ class TestToolLoopGovernance:
def test_text_response_resets_counter(self): def test_text_response_resets_counter(self):
"""A text response resets the tool loop counter.""" """A text response resets the tool loop counter."""
handler = SamplingHandler("tl2", {"max_tool_rounds": 1}) handler = SamplingHandler("tl2", {"max_tool_rounds": 1})
fake_client = MagicMock()
# Use a list to hold the current response, so the side_effect can
# pick up changes between calls.
responses = [_make_llm_tool_response()]
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), side_effect=lambda **kw: responses[0],
): ):
# Tool response (round 1 of 1 allowed) # Tool response (round 1 of 1 allowed)
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
r1 = asyncio.run(handler(None, _make_sampling_params())) r1 = asyncio.run(handler(None, _make_sampling_params()))
assert isinstance(r1, CreateMessageResultWithTools) assert isinstance(r1, CreateMessageResultWithTools)
# Text response resets counter # Text response resets counter
fake_client.chat.completions.create.return_value = _make_llm_response() responses[0] = _make_llm_response()
r2 = asyncio.run(handler(None, _make_sampling_params())) r2 = asyncio.run(handler(None, _make_sampling_params()))
assert isinstance(r2, CreateMessageResult) assert isinstance(r2, CreateMessageResult)
# Tool response again (should succeed since counter was reset) # Tool response again (should succeed since counter was reset)
fake_client.chat.completions.create.return_value = _make_llm_tool_response() responses[0] = _make_llm_tool_response()
r3 = asyncio.run(handler(None, _make_sampling_params())) r3 = asyncio.run(handler(None, _make_sampling_params()))
assert isinstance(r3, CreateMessageResultWithTools) assert isinstance(r3, CreateMessageResultWithTools)
@ -1984,8 +1986,8 @@ class TestToolLoopGovernance:
fake_client.chat.completions.create.return_value = _make_llm_tool_response() fake_client.chat.completions.create.return_value = _make_llm_tool_response()
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), return_value=fake_client.chat.completions.create.return_value,
): ):
result = asyncio.run(handler(None, _make_sampling_params())) result = asyncio.run(handler(None, _make_sampling_params()))
assert isinstance(result, ErrorData) assert isinstance(result, ErrorData)
@ -2003,8 +2005,8 @@ class TestSamplingErrors:
fake_client.chat.completions.create.return_value = _make_llm_response() fake_client.chat.completions.create.return_value = _make_llm_response()
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), return_value=fake_client.chat.completions.create.return_value,
): ):
# First call succeeds # First call succeeds
r1 = asyncio.run(handler(None, _make_sampling_params())) r1 = asyncio.run(handler(None, _make_sampling_params()))
@ -2017,20 +2019,16 @@ class TestSamplingErrors:
def test_timeout_error(self): def test_timeout_error(self):
handler = SamplingHandler("to", {"timeout": 0.05}) handler = SamplingHandler("to", {"timeout": 0.05})
fake_client = MagicMock()
def slow_call(**kwargs): def slow_call(**kwargs):
import threading import threading
# Use an event to ensure the thread truly blocks long enough
evt = threading.Event() evt = threading.Event()
evt.wait(5) # blocks for up to 5 seconds (cancelled by timeout) evt.wait(5) # blocks for up to 5 seconds (cancelled by timeout)
return _make_llm_response() return _make_llm_response()
fake_client.chat.completions.create.side_effect = slow_call
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), side_effect=slow_call,
): ):
result = asyncio.run(handler(None, _make_sampling_params())) result = asyncio.run(handler(None, _make_sampling_params()))
assert isinstance(result, ErrorData) assert isinstance(result, ErrorData)
@ -2041,12 +2039,11 @@ class TestSamplingErrors:
handler = SamplingHandler("np", {}) handler = SamplingHandler("np", {})
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(None, None), side_effect=RuntimeError("No LLM provider configured"),
): ):
result = asyncio.run(handler(None, _make_sampling_params())) result = asyncio.run(handler(None, _make_sampling_params()))
assert isinstance(result, ErrorData) assert isinstance(result, ErrorData)
assert "No LLM provider" in result.message
assert handler.metrics["errors"] == 1 assert handler.metrics["errors"] == 1
def test_empty_choices_returns_error(self): def test_empty_choices_returns_error(self):
@ -2060,8 +2057,8 @@ class TestSamplingErrors:
) )
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), return_value=fake_client.chat.completions.create.return_value,
): ):
result = asyncio.run(handler(None, _make_sampling_params())) result = asyncio.run(handler(None, _make_sampling_params()))
@ -2080,8 +2077,8 @@ class TestSamplingErrors:
) )
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), return_value=fake_client.chat.completions.create.return_value,
): ):
result = asyncio.run(handler(None, _make_sampling_params())) result = asyncio.run(handler(None, _make_sampling_params()))
@ -2099,8 +2096,8 @@ class TestSamplingErrors:
) )
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), return_value=fake_client.chat.completions.create.return_value,
): ):
result = asyncio.run(handler(None, _make_sampling_params())) result = asyncio.run(handler(None, _make_sampling_params()))
@ -2120,19 +2117,19 @@ class TestModelWhitelist:
fake_client.chat.completions.create.return_value = _make_llm_response() fake_client.chat.completions.create.return_value = _make_llm_response()
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "test-model"), return_value=fake_client.chat.completions.create.return_value,
): ):
result = asyncio.run(handler(None, _make_sampling_params())) result = asyncio.run(handler(None, _make_sampling_params()))
assert isinstance(result, CreateMessageResult) assert isinstance(result, CreateMessageResult)
def test_disallowed_model_rejected(self): def test_disallowed_model_rejected(self):
handler = SamplingHandler("wl2", {"allowed_models": ["gpt-4o"]}) handler = SamplingHandler("wl2", {"allowed_models": ["gpt-4o"], "model": "test-model"})
fake_client = MagicMock() fake_client = MagicMock()
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "gpt-3.5-turbo"), return_value=fake_client.chat.completions.create.return_value,
): ):
result = asyncio.run(handler(None, _make_sampling_params())) result = asyncio.run(handler(None, _make_sampling_params()))
assert isinstance(result, ErrorData) assert isinstance(result, ErrorData)
@ -2145,8 +2142,8 @@ class TestModelWhitelist:
fake_client.chat.completions.create.return_value = _make_llm_response() fake_client.chat.completions.create.return_value = _make_llm_response()
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "any-model"), return_value=fake_client.chat.completions.create.return_value,
): ):
result = asyncio.run(handler(None, _make_sampling_params())) result = asyncio.run(handler(None, _make_sampling_params()))
assert isinstance(result, CreateMessageResult) assert isinstance(result, CreateMessageResult)
@ -2166,8 +2163,8 @@ class TestMalformedToolCallArgs:
) )
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), return_value=fake_client.chat.completions.create.return_value,
): ):
result = asyncio.run(handler(None, _make_sampling_params())) result = asyncio.run(handler(None, _make_sampling_params()))
@ -2194,8 +2191,8 @@ class TestMalformedToolCallArgs:
fake_client.chat.completions.create.return_value = response fake_client.chat.completions.create.return_value = response
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), return_value=fake_client.chat.completions.create.return_value,
): ):
result = asyncio.run(handler(None, _make_sampling_params())) result = asyncio.run(handler(None, _make_sampling_params()))
@ -2214,8 +2211,8 @@ class TestMetricsTracking:
fake_client.chat.completions.create.return_value = _make_llm_response() fake_client.chat.completions.create.return_value = _make_llm_response()
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), return_value=fake_client.chat.completions.create.return_value,
): ):
asyncio.run(handler(None, _make_sampling_params())) asyncio.run(handler(None, _make_sampling_params()))
@ -2229,8 +2226,8 @@ class TestMetricsTracking:
fake_client.chat.completions.create.return_value = _make_llm_tool_response() fake_client.chat.completions.create.return_value = _make_llm_tool_response()
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(fake_client, "default-model"), return_value=fake_client.chat.completions.create.return_value,
): ):
asyncio.run(handler(None, _make_sampling_params())) asyncio.run(handler(None, _make_sampling_params()))
@ -2241,8 +2238,8 @@ class TestMetricsTracking:
handler = SamplingHandler("met3", {}) handler = SamplingHandler("met3", {})
with patch( with patch(
"agent.auxiliary_client.get_text_auxiliary_client", "agent.auxiliary_client.call_llm",
return_value=(None, None), side_effect=RuntimeError("No LLM provider configured"),
): ):
asyncio.run(handler(None, _make_sampling_params())) asyncio.run(handler(None, _make_sampling_params()))

View file

@ -189,16 +189,14 @@ class TestSessionSearch:
{"role": "assistant", "content": "hi there"}, {"role": "assistant", "content": "hi there"},
] ]
# Mock the summarizer to return a simple summary # Mock async_call_llm to raise RuntimeError → summarizer returns None
import tools.session_search_tool as sst from unittest.mock import AsyncMock, patch as _patch
original_client = sst._async_aux_client with _patch("tools.session_search_tool.async_call_llm",
sst._async_aux_client = None # Disable summarizer → returns None new_callable=AsyncMock,
side_effect=RuntimeError("no provider")):
result = json.loads(session_search( result = json.loads(session_search(
query="test", db=mock_db, current_session_id=current_sid, query="test", db=mock_db, current_session_id=current_sid,
)) ))
sst._async_aux_client = original_client
assert result["success"] is True assert result["success"] is True
# Current session should be skipped, only other_sid should appear # Current session should be skipped, only other_sid should appear

View file

@ -202,7 +202,7 @@ class TestHandleVisionAnalyze:
assert model == "custom/model-v1" assert model == "custom/model-v1"
def test_falls_back_to_default_model(self): def test_falls_back_to_default_model(self):
"""Without AUXILIARY_VISION_MODEL, should use DEFAULT_VISION_MODEL or fallback.""" """Without AUXILIARY_VISION_MODEL, model should be None (let call_llm resolve default)."""
with ( with (
patch( patch(
"tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock "tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock
@ -218,9 +218,9 @@ class TestHandleVisionAnalyze:
coro.close() coro.close()
call_args = mock_tool.call_args call_args = mock_tool.call_args
model = call_args[0][2] model = call_args[0][2]
# Should be DEFAULT_VISION_MODEL or the hardcoded fallback # With no AUXILIARY_VISION_MODEL set, model should be None
assert model is not None # (the centralized call_llm router picks the default)
assert len(model) > 0 assert model is None
def test_empty_args_graceful(self): def test_empty_args_graceful(self):
"""Missing keys should default to empty strings, not raise.""" """Missing keys should default to empty strings, not raise."""
@ -277,8 +277,6 @@ class TestErrorLoggingExcInfo:
new_callable=AsyncMock, new_callable=AsyncMock,
side_effect=Exception("download boom"), side_effect=Exception("download boom"),
), ),
patch("tools.vision_tools._aux_async_client", MagicMock()),
patch("tools.vision_tools.DEFAULT_VISION_MODEL", "test/model"),
caplog.at_level(logging.ERROR, logger="tools.vision_tools"), caplog.at_level(logging.ERROR, logger="tools.vision_tools"),
): ):
result = await vision_analyze_tool( result = await vision_analyze_tool(
@ -311,25 +309,16 @@ class TestErrorLoggingExcInfo:
"tools.vision_tools._image_to_base64_data_url", "tools.vision_tools._image_to_base64_data_url",
return_value="data:image/jpeg;base64,abc", return_value="data:image/jpeg;base64,abc",
), ),
patch("agent.auxiliary_client.get_auxiliary_extra_body", return_value=None),
patch(
"agent.auxiliary_client.auxiliary_max_tokens_param",
return_value={"max_tokens": 2000},
),
caplog.at_level(logging.WARNING, logger="tools.vision_tools"), caplog.at_level(logging.WARNING, logger="tools.vision_tools"),
): ):
# Mock the vision client # Mock the async_call_llm function to return a mock response
mock_client = AsyncMock()
mock_response = MagicMock() mock_response = MagicMock()
mock_choice = MagicMock() mock_choice = MagicMock()
mock_choice.message.content = "A test image description" mock_choice.message.content = "A test image description"
mock_response.choices = [mock_choice] mock_response.choices = [mock_choice]
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
# Patch module-level _aux_async_client so the tool doesn't bail early
with ( with (
patch("tools.vision_tools._aux_async_client", mock_client), patch("tools.vision_tools.async_call_llm", new_callable=AsyncMock, return_value=mock_response),
patch("tools.vision_tools.DEFAULT_VISION_MODEL", "test/model"),
): ):
# Make unlink fail to trigger cleanup warning # Make unlink fail to trigger cleanup warning
original_unlink = Path.unlink original_unlink = Path.unlink

View file

@ -63,7 +63,7 @@ import time
import requests import requests
from typing import Dict, Any, Optional, List from typing import Dict, Any, Optional, List
from pathlib import Path from pathlib import Path
from agent.auxiliary_client import get_vision_auxiliary_client, get_text_auxiliary_client from agent.auxiliary_client import call_llm
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -80,38 +80,15 @@ DEFAULT_SESSION_TIMEOUT = 300
# Max tokens for snapshot content before summarization # Max tokens for snapshot content before summarization
SNAPSHOT_SUMMARIZE_THRESHOLD = 8000 SNAPSHOT_SUMMARIZE_THRESHOLD = 8000
# Vision client — for browser_vision (screenshot analysis)
# Wrapped in try/except so a broken auxiliary config doesn't prevent the entire
# browser_tool module from importing (which would disable all 10 browser tools).
try:
_aux_vision_client, _DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
except Exception as _init_err:
logger.debug("Could not initialise vision auxiliary client: %s", _init_err)
_aux_vision_client, _DEFAULT_VISION_MODEL = None, None
# Text client — for page snapshot summarization (same config as web_extract) def _get_vision_model() -> Optional[str]:
try:
_aux_text_client, _DEFAULT_TEXT_MODEL = get_text_auxiliary_client("web_extract")
except Exception as _init_err:
logger.debug("Could not initialise text auxiliary client: %s", _init_err)
_aux_text_client, _DEFAULT_TEXT_MODEL = None, None
# Module-level alias for availability checks
EXTRACTION_MODEL = _DEFAULT_TEXT_MODEL or _DEFAULT_VISION_MODEL
def _get_vision_model() -> str:
"""Model for browser_vision (screenshot analysis — multimodal).""" """Model for browser_vision (screenshot analysis — multimodal)."""
return (os.getenv("AUXILIARY_VISION_MODEL", "").strip() return os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None
or _DEFAULT_VISION_MODEL
or "google/gemini-3-flash-preview")
def _get_extraction_model() -> str: def _get_extraction_model() -> Optional[str]:
"""Model for page snapshot text summarization — same as web_extract.""" """Model for page snapshot text summarization — same as web_extract."""
return (os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() return os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None
or _DEFAULT_TEXT_MODEL
or "google/gemini-3-flash-preview")
def _is_local_mode() -> bool: def _is_local_mode() -> bool:
@ -941,9 +918,6 @@ def _extract_relevant_content(
Falls back to simple truncation when no auxiliary text model is configured. Falls back to simple truncation when no auxiliary text model is configured.
""" """
if _aux_text_client is None:
return _truncate_snapshot(snapshot_text)
if user_task: if user_task:
extraction_prompt = ( extraction_prompt = (
f"You are a content extractor for a browser automation agent.\n\n" f"You are a content extractor for a browser automation agent.\n\n"
@ -968,13 +942,16 @@ def _extract_relevant_content(
) )
try: try:
from agent.auxiliary_client import auxiliary_max_tokens_param call_kwargs = {
response = _aux_text_client.chat.completions.create( "task": "web_extract",
model=_get_extraction_model(), "messages": [{"role": "user", "content": extraction_prompt}],
messages=[{"role": "user", "content": extraction_prompt}], "max_tokens": 4000,
**auxiliary_max_tokens_param(4000), "temperature": 0.1,
temperature=0.1, }
) model = _get_extraction_model()
if model:
call_kwargs["model"] = model
response = call_llm(**call_kwargs)
return response.choices[0].message.content return response.choices[0].message.content
except Exception: except Exception:
return _truncate_snapshot(snapshot_text) return _truncate_snapshot(snapshot_text)
@ -1497,14 +1474,6 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
effective_task_id = task_id or "default" effective_task_id = task_id or "default"
# Check auxiliary vision client
if _aux_vision_client is None or _DEFAULT_VISION_MODEL is None:
return json.dumps({
"success": False,
"error": "Browser vision unavailable: no auxiliary vision model configured. "
"Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision."
}, ensure_ascii=False)
# Save screenshot to persistent location so it can be shared with users # Save screenshot to persistent location so it can be shared with users
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
screenshots_dir = hermes_home / "browser_screenshots" screenshots_dir = hermes_home / "browser_screenshots"
@ -1562,14 +1531,13 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
f"Focus on answering the user's specific question." f"Focus on answering the user's specific question."
) )
# Use the sync auxiliary vision client directly # Use the centralized LLM router
from agent.auxiliary_client import auxiliary_max_tokens_param
vision_model = _get_vision_model() vision_model = _get_vision_model()
logger.debug("browser_vision: analysing screenshot (%d bytes) with model=%s", logger.debug("browser_vision: analysing screenshot (%d bytes)",
len(image_data), vision_model) len(image_data))
response = _aux_vision_client.chat.completions.create( call_kwargs = {
model=vision_model, "task": "vision",
messages=[ "messages": [
{ {
"role": "user", "role": "user",
"content": [ "content": [
@ -1578,9 +1546,12 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
], ],
} }
], ],
**auxiliary_max_tokens_param(2000), "max_tokens": 2000,
temperature=0.1, "temperature": 0.1,
) }
if vision_model:
call_kwargs["model"] = vision_model
response = call_llm(**call_kwargs)
analysis = response.choices[0].message.content analysis = response.choices[0].message.content
response_data = { response_data = {

View file

@ -456,17 +456,13 @@ class SamplingHandler:
# Resolve model # Resolve model
model = self._resolve_model(getattr(params, "modelPreferences", None)) model = self._resolve_model(getattr(params, "modelPreferences", None))
# Get auxiliary LLM client # Get auxiliary LLM client via centralized router
from agent.auxiliary_client import get_text_auxiliary_client from agent.auxiliary_client import call_llm
client, default_model = get_text_auxiliary_client()
if client is None:
self.metrics["errors"] += 1
return self._error("No LLM provider available for sampling")
resolved_model = model or default_model # Model whitelist check (we need to resolve model before calling)
resolved_model = model or self.model_override or ""
# Model whitelist check if self.allowed_models and resolved_model and resolved_model not in self.allowed_models:
if self.allowed_models and resolved_model not in self.allowed_models:
logger.warning( logger.warning(
"MCP server '%s' requested model '%s' not in allowed_models", "MCP server '%s' requested model '%s' not in allowed_models",
self.server_name, resolved_model, self.server_name, resolved_model,
@ -484,20 +480,15 @@ class SamplingHandler:
# Build LLM call kwargs # Build LLM call kwargs
max_tokens = min(params.maxTokens, self.max_tokens_cap) max_tokens = min(params.maxTokens, self.max_tokens_cap)
call_kwargs: dict = { call_temperature = None
"model": resolved_model,
"messages": messages,
"max_tokens": max_tokens,
}
if hasattr(params, "temperature") and params.temperature is not None: if hasattr(params, "temperature") and params.temperature is not None:
call_kwargs["temperature"] = params.temperature call_temperature = params.temperature
if stop := getattr(params, "stopSequences", None):
call_kwargs["stop"] = stop
# Forward server-provided tools # Forward server-provided tools
call_tools = None
server_tools = getattr(params, "tools", None) server_tools = getattr(params, "tools", None)
if server_tools: if server_tools:
call_kwargs["tools"] = [ call_tools = [
{ {
"type": "function", "type": "function",
"function": { "function": {
@ -508,9 +499,6 @@ class SamplingHandler:
} }
for t in server_tools for t in server_tools
] ]
if tool_choice := getattr(params, "toolChoice", None):
mode = getattr(tool_choice, "mode", "auto")
call_kwargs["tool_choice"] = {"auto": "auto", "required": "required", "none": "none"}.get(mode, "auto")
logger.log( logger.log(
self.audit_level, self.audit_level,
@ -520,7 +508,15 @@ class SamplingHandler:
# Offload sync LLM call to thread (non-blocking) # Offload sync LLM call to thread (non-blocking)
def _sync_call(): def _sync_call():
return client.chat.completions.create(**call_kwargs) return call_llm(
task="mcp",
model=resolved_model or None,
messages=messages,
temperature=call_temperature,
max_tokens=max_tokens,
tools=call_tools,
timeout=self.timeout,
)
try: try:
response = await asyncio.wait_for( response = await asyncio.wait_for(

View file

@ -1,39 +1,30 @@
"""Shared OpenRouter API client for Hermes tools. """Shared OpenRouter API client for Hermes tools.
Provides a single lazy-initialized AsyncOpenAI client that all tool modules Provides a single lazy-initialized AsyncOpenAI client that all tool modules
can share, eliminating the duplicated _get_openrouter_client() / can share. Routes through the centralized provider router in
_get_summarizer_client() pattern previously copy-pasted across web_tools, agent/auxiliary_client.py so auth, headers, and API format are handled
vision_tools, mixture_of_agents_tool, and session_search_tool. consistently.
""" """
import os import os
from openai import AsyncOpenAI _client = None
from hermes_constants import OPENROUTER_BASE_URL
_client: AsyncOpenAI | None = None
def get_async_client() -> AsyncOpenAI: def get_async_client():
"""Return a shared AsyncOpenAI client pointed at OpenRouter. """Return a shared async OpenAI-compatible client for OpenRouter.
The client is created lazily on first call and reused thereafter. The client is created lazily on first call and reused thereafter.
Uses the centralized provider router for auth and client construction.
Raises ValueError if OPENROUTER_API_KEY is not set. Raises ValueError if OPENROUTER_API_KEY is not set.
""" """
global _client global _client
if _client is None: if _client is None:
api_key = os.getenv("OPENROUTER_API_KEY") from agent.auxiliary_client import resolve_provider_client
if not api_key: client, _model = resolve_provider_client("openrouter", async_mode=True)
if client is None:
raise ValueError("OPENROUTER_API_KEY environment variable not set") raise ValueError("OPENROUTER_API_KEY environment variable not set")
_client = AsyncOpenAI( _client = client
api_key=api_key,
base_url=OPENROUTER_BASE_URL,
default_headers={
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
},
)
return _client return _client

View file

@ -22,13 +22,7 @@ import os
import logging import logging
from typing import Dict, Any, List, Optional, Union from typing import Dict, Any, List, Optional, Union
from openai import AsyncOpenAI, OpenAI from agent.auxiliary_client import async_call_llm
from agent.auxiliary_client import get_async_text_auxiliary_client
# Resolve the async auxiliary client at import time so we have the model slug.
# Handles Codex Responses API adapter transparently.
_async_aux_client, _SUMMARIZER_MODEL = get_async_text_auxiliary_client()
MAX_SESSION_CHARS = 100_000 MAX_SESSION_CHARS = 100_000
MAX_SUMMARY_TOKENS = 10000 MAX_SUMMARY_TOKENS = 10000
@ -156,26 +150,22 @@ async def _summarize_session(
f"Summarize this conversation with focus on: {query}" f"Summarize this conversation with focus on: {query}"
) )
if _async_aux_client is None or _SUMMARIZER_MODEL is None:
logging.warning("No auxiliary model available for session summarization")
return None
max_retries = 3 max_retries = 3
for attempt in range(max_retries): for attempt in range(max_retries):
try: try:
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param response = await async_call_llm(
_extra = get_auxiliary_extra_body() task="session_search",
response = await _async_aux_client.chat.completions.create(
model=_SUMMARIZER_MODEL,
messages=[ messages=[
{"role": "system", "content": system_prompt}, {"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}, {"role": "user", "content": user_prompt},
], ],
**({} if not _extra else {"extra_body": _extra}),
temperature=0.1, temperature=0.1,
**auxiliary_max_tokens_param(MAX_SUMMARY_TOKENS), max_tokens=MAX_SUMMARY_TOKENS,
) )
return response.choices[0].message.content.strip() return response.choices[0].message.content.strip()
except RuntimeError:
logging.warning("No auxiliary model available for session summarization")
return None
except Exception as e: except Exception as e:
if attempt < max_retries - 1: if attempt < max_retries - 1:
await asyncio.sleep(1 * (attempt + 1)) await asyncio.sleep(1 * (attempt + 1))
@ -333,8 +323,6 @@ def session_search(
def check_session_search_requirements() -> bool: def check_session_search_requirements() -> bool:
"""Requires SQLite state database and an auxiliary text model.""" """Requires SQLite state database and an auxiliary text model."""
if _async_aux_client is None:
return False
try: try:
from hermes_state import DEFAULT_DB_PATH from hermes_state import DEFAULT_DB_PATH
return DEFAULT_DB_PATH.parent.exists() return DEFAULT_DB_PATH.parent.exists()

View file

@ -29,7 +29,7 @@ from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import List, Tuple from typing import List, Tuple
from hermes_constants import OPENROUTER_BASE_URL
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@ -934,25 +934,12 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult,
if not model: if not model:
return static_result return static_result
# Call the LLM via the OpenAI SDK (same pattern as run_agent.py) # Call the LLM via the centralized provider router
try: try:
from openai import OpenAI from agent.auxiliary_client import call_llm
import os
api_key = os.getenv("OPENROUTER_API_KEY", "") response = call_llm(
if not api_key: provider="openrouter",
return static_result
client = OpenAI(
base_url=OPENROUTER_BASE_URL,
api_key=api_key,
default_headers={
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
},
)
response = client.chat.completions.create(
model=model, model=model,
messages=[{ messages=[{
"role": "user", "role": "user",

View file

@ -37,28 +37,11 @@ from pathlib import Path
from typing import Any, Awaitable, Dict, Optional from typing import Any, Awaitable, Dict, Optional
from urllib.parse import urlparse from urllib.parse import urlparse
import httpx import httpx
from openai import AsyncOpenAI from agent.auxiliary_client import async_call_llm
from agent.auxiliary_client import get_vision_auxiliary_client
from tools.debug_helpers import DebugSession from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Resolve vision auxiliary client at module level; build an async wrapper.
_aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
_aux_async_client: AsyncOpenAI | None = None
if _aux_sync_client is not None:
_async_kwargs = {
"api_key": _aux_sync_client.api_key,
"base_url": str(_aux_sync_client.base_url),
}
if "openrouter" in str(_aux_sync_client.base_url).lower():
_async_kwargs["default_headers"] = {
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
}
_aux_async_client = AsyncOpenAI(**_async_kwargs)
_debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG") _debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")
@ -197,7 +180,7 @@ def _image_to_base64_data_url(image_path: Path, mime_type: Optional[str] = None)
async def vision_analyze_tool( async def vision_analyze_tool(
image_url: str, image_url: str,
user_prompt: str, user_prompt: str,
model: str = DEFAULT_VISION_MODEL, model: str = None,
) -> str: ) -> str:
""" """
Analyze an image from a URL or local file path using vision AI. Analyze an image from a URL or local file path using vision AI.
@ -257,15 +240,6 @@ async def vision_analyze_tool(
logger.info("Analyzing image: %s", image_url[:60]) logger.info("Analyzing image: %s", image_url[:60])
logger.info("User prompt: %s", user_prompt[:100]) logger.info("User prompt: %s", user_prompt[:100])
# Check auxiliary vision client availability
if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
logger.error("Vision analysis unavailable: no auxiliary vision model configured")
return json.dumps({
"success": False,
"analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
"Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools."
}, indent=2, ensure_ascii=False)
# Determine if this is a local file path or a remote URL # Determine if this is a local file path or a remote URL
local_path = Path(image_url) local_path = Path(image_url)
if local_path.is_file(): if local_path.is_file():
@ -321,18 +295,18 @@ async def vision_analyze_tool(
} }
] ]
logger.info("Processing image with %s...", model) logger.info("Processing image with vision model...")
# Call the vision API # Call the vision API via centralized router
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param call_kwargs = {
_extra = get_auxiliary_extra_body() "task": "vision",
response = await _aux_async_client.chat.completions.create( "messages": messages,
model=model, "temperature": 0.1,
messages=messages, "max_tokens": 2000,
temperature=0.1, }
**auxiliary_max_tokens_param(2000), if model:
**({} if not _extra else {"extra_body": _extra}), call_kwargs["model"] = model
) response = await async_call_llm(**call_kwargs)
# Extract the analysis # Extract the analysis
analysis = response.choices[0].message.content.strip() analysis = response.choices[0].message.content.strip()
@ -359,10 +333,28 @@ async def vision_analyze_tool(
error_msg = f"Error analyzing image: {str(e)}" error_msg = f"Error analyzing image: {str(e)}"
logger.error("%s", error_msg, exc_info=True) logger.error("%s", error_msg, exc_info=True)
# Detect vision capability errors — give the model a clear message
# so it can inform the user instead of a cryptic API error.
err_str = str(e).lower()
if any(hint in err_str for hint in (
"does not support", "not support image", "invalid_request",
"content_policy", "image_url", "multimodal",
"unrecognized request argument", "image input",
)):
analysis = (
f"{model} does not support vision or our request was not "
f"accepted by the server. Error: {e}"
)
else:
analysis = (
"There was a problem with the request and the image could not "
f"be analyzed. Error: {e}"
)
# Prepare error response # Prepare error response
result = { result = {
"success": False, "success": False,
"analysis": "There was a problem with the request and the image could not be analyzed." "analysis": analysis,
} }
debug_call_data["error"] = error_msg debug_call_data["error"] = error_msg
@ -385,7 +377,18 @@ async def vision_analyze_tool(
def check_vision_requirements() -> bool: def check_vision_requirements() -> bool:
"""Check if an auxiliary vision model is available.""" """Check if an auxiliary vision model is available."""
return _aux_async_client is not None try:
from agent.auxiliary_client import resolve_provider_client
client, _ = resolve_provider_client("openrouter")
if client is not None:
return True
client, _ = resolve_provider_client("nous")
if client is not None:
return True
client, _ = resolve_provider_client("custom")
return client is not None
except Exception:
return False
def get_debug_session_info() -> Dict[str, Any]: def get_debug_session_info() -> Dict[str, Any]:
@ -413,10 +416,9 @@ if __name__ == "__main__":
print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.") print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.")
exit(1) exit(1)
else: else:
print(f"✅ Vision model available: {DEFAULT_VISION_MODEL}") print("✅ Vision model available")
print("🛠️ Vision tools ready for use!") print("🛠️ Vision tools ready for use!")
print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
# Show debug mode status # Show debug mode status
if _debug.active: if _debug.active:
@ -483,9 +485,7 @@ def _handle_vision_analyze(args: Dict[str, Any], **kw: Any) -> Awaitable[str]:
"Fully describe and explain everything about this image, then answer the " "Fully describe and explain everything about this image, then answer the "
f"following question:\n\n{question}" f"following question:\n\n{question}"
) )
model = (os.getenv("AUXILIARY_VISION_MODEL", "").strip() model = os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None
or DEFAULT_VISION_MODEL
or "google/gemini-3-flash-preview")
return vision_analyze_tool(image_url, full_prompt, model) return vision_analyze_tool(image_url, full_prompt, model)

View file

@ -47,8 +47,7 @@ import re
import asyncio import asyncio
from typing import List, Dict, Any, Optional from typing import List, Dict, Any, Optional
from firecrawl import Firecrawl from firecrawl import Firecrawl
from openai import AsyncOpenAI from agent.auxiliary_client import async_call_llm
from agent.auxiliary_client import get_async_text_auxiliary_client
from tools.debug_helpers import DebugSession from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -83,15 +82,8 @@ def _get_firecrawl_client():
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000 DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
# Resolve async auxiliary client at module level. # Allow per-task override via env var
# Handles Codex Responses API adapter transparently. DEFAULT_SUMMARIZER_MODEL = os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None
_aux_async_client, _DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client("web_extract")
# Allow per-task override via config.yaml auxiliary.web_extract_model
DEFAULT_SUMMARIZER_MODEL = (
os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip()
or _DEFAULT_SUMMARIZER_MODEL
)
_debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG") _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
@ -249,22 +241,22 @@ Create a markdown summary that captures all key information in a well-organized,
for attempt in range(max_retries): for attempt in range(max_retries):
try: try:
if _aux_async_client is None: call_kwargs = {
logger.warning("No auxiliary model available for web content processing") "task": "web_extract",
return None "messages": [
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
_extra = get_auxiliary_extra_body()
response = await _aux_async_client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": system_prompt}, {"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt} {"role": "user", "content": user_prompt}
], ],
temperature=0.1, "temperature": 0.1,
**auxiliary_max_tokens_param(max_tokens), "max_tokens": max_tokens,
**({} if not _extra else {"extra_body": _extra}), }
) if model:
call_kwargs["model"] = model
response = await async_call_llm(**call_kwargs)
return response.choices[0].message.content.strip() return response.choices[0].message.content.strip()
except RuntimeError:
logger.warning("No auxiliary model available for web content processing")
return None
except Exception as api_error: except Exception as api_error:
last_error = api_error last_error = api_error
if attempt < max_retries - 1: if attempt < max_retries - 1:
@ -368,25 +360,18 @@ Synthesize these into ONE cohesive, comprehensive summary that:
Create a single, unified markdown summary.""" Create a single, unified markdown summary."""
try: try:
if _aux_async_client is None: call_kwargs = {
logger.warning("No auxiliary model for synthesis, concatenating summaries") "task": "web_extract",
fallback = "\n\n".join(summaries) "messages": [
if len(fallback) > max_output_size:
fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
return fallback
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
_extra = get_auxiliary_extra_body()
response = await _aux_async_client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."}, {"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
{"role": "user", "content": synthesis_prompt} {"role": "user", "content": synthesis_prompt}
], ],
temperature=0.1, "temperature": 0.1,
**auxiliary_max_tokens_param(20000), "max_tokens": 20000,
**({} if not _extra else {"extra_body": _extra}), }
) if model:
call_kwargs["model"] = model
response = await async_call_llm(**call_kwargs)
final_summary = response.choices[0].message.content.strip() final_summary = response.choices[0].message.content.strip()
# Enforce hard cap # Enforce hard cap
@ -713,8 +698,8 @@ async def web_extract_tool(
debug_call_data["pages_extracted"] = pages_extracted debug_call_data["pages_extracted"] = pages_extracted
debug_call_data["original_response_size"] = len(json.dumps(response)) debug_call_data["original_response_size"] = len(json.dumps(response))
# Process each result with LLM if enabled and auxiliary client is available # Process each result with LLM if enabled
if use_llm_processing and _aux_async_client is not None: if use_llm_processing:
logger.info("Processing extracted content with LLM (parallel)...") logger.info("Processing extracted content with LLM (parallel)...")
debug_call_data["processing_applied"].append("llm_processing") debug_call_data["processing_applied"].append("llm_processing")
@ -780,10 +765,6 @@ async def web_extract_tool(
else: else:
logger.warning("%s (no content to process)", url) logger.warning("%s (no content to process)", url)
else: else:
if use_llm_processing and _aux_async_client is None:
logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
debug_call_data["processing_applied"].append("llm_processing_unavailable")
# Print summary of extracted pages for debugging (original behavior) # Print summary of extracted pages for debugging (original behavior)
for result in response.get('results', []): for result in response.get('results', []):
url = result.get('url', 'Unknown URL') url = result.get('url', 'Unknown URL')
@ -1013,8 +994,8 @@ async def web_crawl_tool(
debug_call_data["pages_crawled"] = pages_crawled debug_call_data["pages_crawled"] = pages_crawled
debug_call_data["original_response_size"] = len(json.dumps(response)) debug_call_data["original_response_size"] = len(json.dumps(response))
# Process each result with LLM if enabled and auxiliary client is available # Process each result with LLM if enabled
if use_llm_processing and _aux_async_client is not None: if use_llm_processing:
logger.info("Processing crawled content with LLM (parallel)...") logger.info("Processing crawled content with LLM (parallel)...")
debug_call_data["processing_applied"].append("llm_processing") debug_call_data["processing_applied"].append("llm_processing")
@ -1080,10 +1061,6 @@ async def web_crawl_tool(
else: else:
logger.warning("%s (no content to process)", page_url) logger.warning("%s (no content to process)", page_url)
else: else:
if use_llm_processing and _aux_async_client is None:
logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
debug_call_data["processing_applied"].append("llm_processing_unavailable")
# Print summary of crawled pages for debugging (original behavior) # Print summary of crawled pages for debugging (original behavior)
for result in response.get('results', []): for result in response.get('results', []):
page_url = result.get('url', 'Unknown URL') page_url = result.get('url', 'Unknown URL')
@ -1138,7 +1115,15 @@ def check_firecrawl_api_key() -> bool:
def check_auxiliary_model() -> bool: def check_auxiliary_model() -> bool:
"""Check if an auxiliary text model is available for LLM content processing.""" """Check if an auxiliary text model is available for LLM content processing."""
return _aux_async_client is not None try:
from agent.auxiliary_client import resolve_provider_client
for p in ("openrouter", "nous", "custom", "codex"):
client, _ = resolve_provider_client(p)
if client is not None:
return True
return False
except Exception:
return False
def get_debug_session_info() -> Dict[str, Any]: def get_debug_session_info() -> Dict[str, Any]:

View file

@ -344,38 +344,65 @@ class TrajectoryCompressor:
raise RuntimeError(f"Failed to load tokenizer '{self.config.tokenizer_name}': {e}") raise RuntimeError(f"Failed to load tokenizer '{self.config.tokenizer_name}': {e}")
def _init_summarizer(self): def _init_summarizer(self):
"""Initialize OpenRouter client for summarization (sync and async).""" """Initialize LLM routing for summarization (sync and async).
api_key = os.getenv(self.config.api_key_env)
if not api_key: Uses call_llm/async_call_llm from the centralized provider router
raise RuntimeError(f"Missing API key. Set {self.config.api_key_env} environment variable.") which handles auth, headers, and provider detection internally.
For custom endpoints, falls back to raw client construction.
from openai import OpenAI, AsyncOpenAI """
from agent.auxiliary_client import call_llm, async_call_llm
# OpenRouter app attribution headers (only for OpenRouter endpoints)
extra = {} provider = self._detect_provider()
if "openrouter" in self.config.base_url.lower(): if provider:
extra["default_headers"] = { # Store provider for use in _generate_summary calls
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent", self._llm_provider = provider
"X-OpenRouter-Title": "Hermes Agent", self._use_call_llm = True
"X-OpenRouter-Categories": "productivity,cli-agent", # Verify the provider is available
} from agent.auxiliary_client import resolve_provider_client
client, _ = resolve_provider_client(
# Sync client (for backwards compatibility) provider, model=self.config.summarization_model)
self.client = OpenAI( if client is None:
api_key=api_key, raise RuntimeError(
base_url=self.config.base_url, f"Provider '{provider}' is not configured. "
**extra, f"Check your API key or run: hermes setup")
) self.client = None # Not used directly
self.async_client = None # Not used directly
# Async client for parallel processing else:
self.async_client = AsyncOpenAI( # Custom endpoint — use config's raw base_url + api_key_env
api_key=api_key, self._use_call_llm = False
base_url=self.config.base_url, api_key = os.getenv(self.config.api_key_env)
**extra, if not api_key:
) raise RuntimeError(
f"Missing API key. Set {self.config.api_key_env} "
print(f"✅ Initialized OpenRouter client: {self.config.summarization_model}") f"environment variable.")
from openai import OpenAI, AsyncOpenAI
self.client = OpenAI(
api_key=api_key, base_url=self.config.base_url)
self.async_client = AsyncOpenAI(
api_key=api_key, base_url=self.config.base_url)
print(f"✅ Initialized summarizer client: {self.config.summarization_model}")
print(f" Max concurrent requests: {self.config.max_concurrent_requests}") print(f" Max concurrent requests: {self.config.max_concurrent_requests}")
def _detect_provider(self) -> str:
"""Detect the provider name from the configured base_url."""
url = self.config.base_url.lower()
if "openrouter" in url:
return "openrouter"
if "nousresearch.com" in url:
return "nous"
if "chatgpt.com/backend-api/codex" in url:
return "codex"
if "api.z.ai" in url:
return "zai"
if "moonshot.ai" in url or "api.kimi.com" in url:
return "kimi-coding"
if "minimaxi.com" in url:
return "minimax-cn"
if "minimax.io" in url:
return "minimax"
# Unknown base_url — not a known provider
return ""
def count_tokens(self, text: str) -> int: def count_tokens(self, text: str) -> int:
"""Count tokens in text using the configured tokenizer.""" """Count tokens in text using the configured tokenizer."""
@ -501,12 +528,22 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
try: try:
metrics.summarization_api_calls += 1 metrics.summarization_api_calls += 1
response = self.client.chat.completions.create( if getattr(self, '_use_call_llm', False):
model=self.config.summarization_model, from agent.auxiliary_client import call_llm
messages=[{"role": "user", "content": prompt}], response = call_llm(
temperature=self.config.temperature, provider=self._llm_provider,
max_tokens=self.config.summary_target_tokens * 2, model=self.config.summarization_model,
) messages=[{"role": "user", "content": prompt}],
temperature=self.config.temperature,
max_tokens=self.config.summary_target_tokens * 2,
)
else:
response = self.client.chat.completions.create(
model=self.config.summarization_model,
messages=[{"role": "user", "content": prompt}],
temperature=self.config.temperature,
max_tokens=self.config.summary_target_tokens * 2,
)
summary = response.choices[0].message.content.strip() summary = response.choices[0].message.content.strip()
@ -558,12 +595,22 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
try: try:
metrics.summarization_api_calls += 1 metrics.summarization_api_calls += 1
response = await self.async_client.chat.completions.create( if getattr(self, '_use_call_llm', False):
model=self.config.summarization_model, from agent.auxiliary_client import async_call_llm
messages=[{"role": "user", "content": prompt}], response = await async_call_llm(
temperature=self.config.temperature, provider=self._llm_provider,
max_tokens=self.config.summary_target_tokens * 2, model=self.config.summarization_model,
) messages=[{"role": "user", "content": prompt}],
temperature=self.config.temperature,
max_tokens=self.config.summary_target_tokens * 2,
)
else:
response = await self.async_client.chat.completions.create(
model=self.config.summarization_model,
messages=[{"role": "user", "content": prompt}],
temperature=self.config.temperature,
max_tokens=self.config.summary_target_tokens * 2,
)
summary = response.choices[0].message.content.strip() summary = response.choices[0].message.content.strip()