Merge pull request #1003 from NousResearch/hermes/hermes-cf9f7d54
feat: centralized provider router, call_llm API, unified /model command
This commit is contained in:
commit
9cb9d1a47a
33 changed files with 1260 additions and 937 deletions
|
|
@ -17,7 +17,10 @@ Resolution order for text tasks (auto mode):
|
||||||
Resolution order for vision/multimodal tasks (auto mode):
|
Resolution order for vision/multimodal tasks (auto mode):
|
||||||
1. OpenRouter
|
1. OpenRouter
|
||||||
2. Nous Portal
|
2. Nous Portal
|
||||||
3. None (steps 3-5 are skipped — they may not support multimodal)
|
3. Codex OAuth (gpt-5.3-codex supports vision via Responses API)
|
||||||
|
4. Custom endpoint (for local vision models: Qwen-VL, LLaVA, Pixtral, etc.)
|
||||||
|
5. None (API-key providers like z.ai/Kimi/MiniMax are skipped —
|
||||||
|
they may not support multimodal)
|
||||||
|
|
||||||
Per-task provider overrides (e.g. AUXILIARY_VISION_PROVIDER,
|
Per-task provider overrides (e.g. AUXILIARY_VISION_PROVIDER,
|
||||||
CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task:
|
CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task:
|
||||||
|
|
@ -440,7 +443,7 @@ def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||||
custom_key = os.getenv("OPENAI_API_KEY")
|
custom_key = os.getenv("OPENAI_API_KEY")
|
||||||
if not custom_base or not custom_key:
|
if not custom_base or not custom_key:
|
||||||
return None, None
|
return None, None
|
||||||
model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini"
|
model = os.getenv("OPENAI_MODEL") or "gpt-4o-mini"
|
||||||
logger.debug("Auxiliary client: custom endpoint (%s)", model)
|
logger.debug("Auxiliary client: custom endpoint (%s)", model)
|
||||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||||
|
|
||||||
|
|
@ -499,6 +502,205 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
|
# ── Centralized Provider Router ─────────────────────────────────────────────
|
||||||
|
#
|
||||||
|
# resolve_provider_client() is the single entry point for creating a properly
|
||||||
|
# configured client given a (provider, model) pair. It handles auth lookup,
|
||||||
|
# base URL resolution, provider-specific headers, and API format differences
|
||||||
|
# (Chat Completions vs Responses API for Codex).
|
||||||
|
#
|
||||||
|
# All auxiliary consumer code should go through this or the public helpers
|
||||||
|
# below — never look up auth env vars ad-hoc.
|
||||||
|
|
||||||
|
|
||||||
|
def _to_async_client(sync_client, model: str):
|
||||||
|
"""Convert a sync client to its async counterpart, preserving Codex routing."""
|
||||||
|
from openai import AsyncOpenAI
|
||||||
|
|
||||||
|
if isinstance(sync_client, CodexAuxiliaryClient):
|
||||||
|
return AsyncCodexAuxiliaryClient(sync_client), model
|
||||||
|
|
||||||
|
async_kwargs = {
|
||||||
|
"api_key": sync_client.api_key,
|
||||||
|
"base_url": str(sync_client.base_url),
|
||||||
|
}
|
||||||
|
base_lower = str(sync_client.base_url).lower()
|
||||||
|
if "openrouter" in base_lower:
|
||||||
|
async_kwargs["default_headers"] = dict(_OR_HEADERS)
|
||||||
|
elif "api.kimi.com" in base_lower:
|
||||||
|
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
||||||
|
return AsyncOpenAI(**async_kwargs), model
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_provider_client(
|
||||||
|
provider: str,
|
||||||
|
model: str = None,
|
||||||
|
async_mode: bool = False,
|
||||||
|
raw_codex: bool = False,
|
||||||
|
) -> Tuple[Optional[Any], Optional[str]]:
|
||||||
|
"""Central router: given a provider name and optional model, return a
|
||||||
|
configured client with the correct auth, base URL, and API format.
|
||||||
|
|
||||||
|
The returned client always exposes ``.chat.completions.create()`` — for
|
||||||
|
Codex/Responses API providers, an adapter handles the translation
|
||||||
|
transparently.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
provider: Provider identifier. One of:
|
||||||
|
"openrouter", "nous", "openai-codex" (or "codex"),
|
||||||
|
"zai", "kimi-coding", "minimax", "minimax-cn",
|
||||||
|
"custom" (OPENAI_BASE_URL + OPENAI_API_KEY),
|
||||||
|
"auto" (full auto-detection chain).
|
||||||
|
model: Model slug override. If None, uses the provider's default
|
||||||
|
auxiliary model.
|
||||||
|
async_mode: If True, return an async-compatible client.
|
||||||
|
raw_codex: If True, return a raw OpenAI client for Codex providers
|
||||||
|
instead of wrapping in CodexAuxiliaryClient. Use this when
|
||||||
|
the caller needs direct access to responses.stream() (e.g.,
|
||||||
|
the main agent loop).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(client, resolved_model) or (None, None) if auth is unavailable.
|
||||||
|
"""
|
||||||
|
# Normalise aliases
|
||||||
|
provider = (provider or "auto").strip().lower()
|
||||||
|
if provider == "codex":
|
||||||
|
provider = "openai-codex"
|
||||||
|
if provider == "main":
|
||||||
|
provider = "custom"
|
||||||
|
|
||||||
|
# ── Auto: try all providers in priority order ────────────────────
|
||||||
|
if provider == "auto":
|
||||||
|
client, resolved = _resolve_auto()
|
||||||
|
if client is None:
|
||||||
|
return None, None
|
||||||
|
final_model = model or resolved
|
||||||
|
return (_to_async_client(client, final_model) if async_mode
|
||||||
|
else (client, final_model))
|
||||||
|
|
||||||
|
# ── OpenRouter ───────────────────────────────────────────────────
|
||||||
|
if provider == "openrouter":
|
||||||
|
client, default = _try_openrouter()
|
||||||
|
if client is None:
|
||||||
|
logger.warning("resolve_provider_client: openrouter requested "
|
||||||
|
"but OPENROUTER_API_KEY not set")
|
||||||
|
return None, None
|
||||||
|
final_model = model or default
|
||||||
|
return (_to_async_client(client, final_model) if async_mode
|
||||||
|
else (client, final_model))
|
||||||
|
|
||||||
|
# ── Nous Portal (OAuth) ──────────────────────────────────────────
|
||||||
|
if provider == "nous":
|
||||||
|
client, default = _try_nous()
|
||||||
|
if client is None:
|
||||||
|
logger.warning("resolve_provider_client: nous requested "
|
||||||
|
"but Nous Portal not configured (run: hermes login)")
|
||||||
|
return None, None
|
||||||
|
final_model = model or default
|
||||||
|
return (_to_async_client(client, final_model) if async_mode
|
||||||
|
else (client, final_model))
|
||||||
|
|
||||||
|
# ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
|
||||||
|
if provider == "openai-codex":
|
||||||
|
if raw_codex:
|
||||||
|
# Return the raw OpenAI client for callers that need direct
|
||||||
|
# access to responses.stream() (e.g., the main agent loop).
|
||||||
|
codex_token = _read_codex_access_token()
|
||||||
|
if not codex_token:
|
||||||
|
logger.warning("resolve_provider_client: openai-codex requested "
|
||||||
|
"but no Codex OAuth token found (run: hermes model)")
|
||||||
|
return None, None
|
||||||
|
final_model = model or _CODEX_AUX_MODEL
|
||||||
|
raw_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
|
||||||
|
return (raw_client, final_model)
|
||||||
|
# Standard path: wrap in CodexAuxiliaryClient adapter
|
||||||
|
client, default = _try_codex()
|
||||||
|
if client is None:
|
||||||
|
logger.warning("resolve_provider_client: openai-codex requested "
|
||||||
|
"but no Codex OAuth token found (run: hermes model)")
|
||||||
|
return None, None
|
||||||
|
final_model = model or default
|
||||||
|
return (_to_async_client(client, final_model) if async_mode
|
||||||
|
else (client, final_model))
|
||||||
|
|
||||||
|
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
|
||||||
|
if provider == "custom":
|
||||||
|
# Try custom first, then codex, then API-key providers
|
||||||
|
for try_fn in (_try_custom_endpoint, _try_codex,
|
||||||
|
_resolve_api_key_provider):
|
||||||
|
client, default = try_fn()
|
||||||
|
if client is not None:
|
||||||
|
final_model = model or default
|
||||||
|
return (_to_async_client(client, final_model) if async_mode
|
||||||
|
else (client, final_model))
|
||||||
|
logger.warning("resolve_provider_client: custom/main requested "
|
||||||
|
"but no endpoint credentials found")
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# ── API-key providers from PROVIDER_REGISTRY ─────────────────────
|
||||||
|
try:
|
||||||
|
from hermes_cli.auth import PROVIDER_REGISTRY, _resolve_kimi_base_url
|
||||||
|
except ImportError:
|
||||||
|
logger.debug("hermes_cli.auth not available for provider %s", provider)
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
pconfig = PROVIDER_REGISTRY.get(provider)
|
||||||
|
if pconfig is None:
|
||||||
|
logger.warning("resolve_provider_client: unknown provider %r", provider)
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
if pconfig.auth_type == "api_key":
|
||||||
|
# Find the first configured API key
|
||||||
|
api_key = ""
|
||||||
|
for env_var in pconfig.api_key_env_vars:
|
||||||
|
api_key = os.getenv(env_var, "").strip()
|
||||||
|
if api_key:
|
||||||
|
break
|
||||||
|
if not api_key:
|
||||||
|
logger.warning("resolve_provider_client: provider %s has no API "
|
||||||
|
"key configured (tried: %s)",
|
||||||
|
provider, ", ".join(pconfig.api_key_env_vars))
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
# Resolve base URL (env override → provider-specific logic → default)
|
||||||
|
base_url_override = os.getenv(pconfig.base_url_env_var, "").strip() if pconfig.base_url_env_var else ""
|
||||||
|
if provider == "kimi-coding":
|
||||||
|
base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, base_url_override)
|
||||||
|
elif base_url_override:
|
||||||
|
base_url = base_url_override
|
||||||
|
else:
|
||||||
|
base_url = pconfig.inference_base_url
|
||||||
|
|
||||||
|
default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
|
||||||
|
final_model = model or default_model
|
||||||
|
|
||||||
|
# Provider-specific headers
|
||||||
|
headers = {}
|
||||||
|
if "api.kimi.com" in base_url.lower():
|
||||||
|
headers["User-Agent"] = "KimiCLI/1.0"
|
||||||
|
|
||||||
|
client = OpenAI(api_key=api_key, base_url=base_url,
|
||||||
|
**({"default_headers": headers} if headers else {}))
|
||||||
|
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
|
||||||
|
return (_to_async_client(client, final_model) if async_mode
|
||||||
|
else (client, final_model))
|
||||||
|
|
||||||
|
elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
|
||||||
|
# OAuth providers — route through their specific try functions
|
||||||
|
if provider == "nous":
|
||||||
|
return resolve_provider_client("nous", model, async_mode)
|
||||||
|
if provider == "openai-codex":
|
||||||
|
return resolve_provider_client("openai-codex", model, async_mode)
|
||||||
|
# Other OAuth providers not directly supported
|
||||||
|
logger.warning("resolve_provider_client: OAuth provider %s not "
|
||||||
|
"directly supported, try 'auto'", provider)
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
logger.warning("resolve_provider_client: unhandled auth_type %s for %s",
|
||||||
|
pconfig.auth_type, provider)
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
# ── Public API ──────────────────────────────────────────────────────────────
|
# ── Public API ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]:
|
def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||||
|
|
@ -513,8 +715,8 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
|
||||||
"""
|
"""
|
||||||
forced = _get_auxiliary_provider(task)
|
forced = _get_auxiliary_provider(task)
|
||||||
if forced != "auto":
|
if forced != "auto":
|
||||||
return _resolve_forced_provider(forced)
|
return resolve_provider_client(forced)
|
||||||
return _resolve_auto()
|
return resolve_provider_client("auto")
|
||||||
|
|
||||||
|
|
||||||
def get_async_text_auxiliary_client(task: str = ""):
|
def get_async_text_auxiliary_client(task: str = ""):
|
||||||
|
|
@ -524,24 +726,10 @@ def get_async_text_auxiliary_client(task: str = ""):
|
||||||
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
|
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
|
||||||
Returns (None, None) when no provider is available.
|
Returns (None, None) when no provider is available.
|
||||||
"""
|
"""
|
||||||
from openai import AsyncOpenAI
|
forced = _get_auxiliary_provider(task)
|
||||||
|
if forced != "auto":
|
||||||
sync_client, model = get_text_auxiliary_client(task)
|
return resolve_provider_client(forced, async_mode=True)
|
||||||
if sync_client is None:
|
return resolve_provider_client("auto", async_mode=True)
|
||||||
return None, None
|
|
||||||
|
|
||||||
if isinstance(sync_client, CodexAuxiliaryClient):
|
|
||||||
return AsyncCodexAuxiliaryClient(sync_client), model
|
|
||||||
|
|
||||||
async_kwargs = {
|
|
||||||
"api_key": sync_client.api_key,
|
|
||||||
"base_url": str(sync_client.base_url),
|
|
||||||
}
|
|
||||||
if "openrouter" in str(sync_client.base_url).lower():
|
|
||||||
async_kwargs["default_headers"] = dict(_OR_HEADERS)
|
|
||||||
elif "api.kimi.com" in str(sync_client.base_url).lower():
|
|
||||||
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
|
||||||
return AsyncOpenAI(**async_kwargs), model
|
|
||||||
|
|
||||||
|
|
||||||
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||||
|
|
@ -559,7 +747,7 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||||
"""
|
"""
|
||||||
forced = _get_auxiliary_provider("vision")
|
forced = _get_auxiliary_provider("vision")
|
||||||
if forced != "auto":
|
if forced != "auto":
|
||||||
return _resolve_forced_provider(forced)
|
return resolve_provider_client(forced)
|
||||||
# Auto: try providers known to support multimodal first, then fall
|
# Auto: try providers known to support multimodal first, then fall
|
||||||
# back to the user's custom endpoint. Many local models (Qwen-VL,
|
# back to the user's custom endpoint. Many local models (Qwen-VL,
|
||||||
# LLaVA, Pixtral, etc.) support vision — skipping them entirely
|
# LLaVA, Pixtral, etc.) support vision — skipping them entirely
|
||||||
|
|
@ -573,6 +761,21 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
|
def get_async_vision_auxiliary_client():
|
||||||
|
"""Return (async_client, model_slug) for async vision consumers.
|
||||||
|
|
||||||
|
Properly handles Codex routing — unlike manually constructing
|
||||||
|
AsyncOpenAI from a sync client, this preserves the Responses API
|
||||||
|
adapter for Codex providers.
|
||||||
|
|
||||||
|
Returns (None, None) when no provider is available.
|
||||||
|
"""
|
||||||
|
sync_client, model = get_vision_auxiliary_client()
|
||||||
|
if sync_client is None:
|
||||||
|
return None, None
|
||||||
|
return _to_async_client(sync_client, model)
|
||||||
|
|
||||||
|
|
||||||
def get_auxiliary_extra_body() -> dict:
|
def get_auxiliary_extra_body() -> dict:
|
||||||
"""Return extra_body kwargs for auxiliary API calls.
|
"""Return extra_body kwargs for auxiliary API calls.
|
||||||
|
|
||||||
|
|
@ -598,3 +801,253 @@ def auxiliary_max_tokens_param(value: int) -> dict:
|
||||||
and "api.openai.com" in custom_base.lower()):
|
and "api.openai.com" in custom_base.lower()):
|
||||||
return {"max_completion_tokens": value}
|
return {"max_completion_tokens": value}
|
||||||
return {"max_tokens": value}
|
return {"max_tokens": value}
|
||||||
|
|
||||||
|
|
||||||
|
# ── Centralized LLM Call API ────────────────────────────────────────────────
|
||||||
|
#
|
||||||
|
# call_llm() and async_call_llm() own the full request lifecycle:
|
||||||
|
# 1. Resolve provider + model from task config (or explicit args)
|
||||||
|
# 2. Get or create a cached client for that provider
|
||||||
|
# 3. Format request args for the provider + model (max_tokens handling, etc.)
|
||||||
|
# 4. Make the API call
|
||||||
|
# 5. Return the response
|
||||||
|
#
|
||||||
|
# Every auxiliary LLM consumer should use these instead of manually
|
||||||
|
# constructing clients and calling .chat.completions.create().
|
||||||
|
|
||||||
|
# Client cache: (provider, async_mode) -> (client, default_model)
|
||||||
|
_client_cache: Dict[tuple, tuple] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_cached_client(
|
||||||
|
provider: str, model: str = None, async_mode: bool = False,
|
||||||
|
) -> Tuple[Optional[Any], Optional[str]]:
|
||||||
|
"""Get or create a cached client for the given provider."""
|
||||||
|
cache_key = (provider, async_mode)
|
||||||
|
if cache_key in _client_cache:
|
||||||
|
cached_client, cached_default = _client_cache[cache_key]
|
||||||
|
return cached_client, model or cached_default
|
||||||
|
client, default_model = resolve_provider_client(provider, model, async_mode)
|
||||||
|
if client is not None:
|
||||||
|
_client_cache[cache_key] = (client, default_model)
|
||||||
|
return client, model or default_model
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_task_provider_model(
|
||||||
|
task: str = None,
|
||||||
|
provider: str = None,
|
||||||
|
model: str = None,
|
||||||
|
) -> Tuple[str, Optional[str]]:
|
||||||
|
"""Determine provider + model for a call.
|
||||||
|
|
||||||
|
Priority:
|
||||||
|
1. Explicit provider/model args (always win)
|
||||||
|
2. Env var overrides (AUXILIARY_{TASK}_PROVIDER, etc.)
|
||||||
|
3. Config file (auxiliary.{task}.provider/model or compression.*)
|
||||||
|
4. "auto" (full auto-detection chain)
|
||||||
|
|
||||||
|
Returns (provider, model) where model may be None (use provider default).
|
||||||
|
"""
|
||||||
|
if provider:
|
||||||
|
return provider, model
|
||||||
|
|
||||||
|
if task:
|
||||||
|
# Check env var overrides first
|
||||||
|
env_provider = _get_auxiliary_provider(task)
|
||||||
|
if env_provider != "auto":
|
||||||
|
# Check for env var model override too
|
||||||
|
env_model = None
|
||||||
|
for prefix in ("AUXILIARY_", "CONTEXT_"):
|
||||||
|
val = os.getenv(f"{prefix}{task.upper()}_MODEL", "").strip()
|
||||||
|
if val:
|
||||||
|
env_model = val
|
||||||
|
break
|
||||||
|
return env_provider, model or env_model
|
||||||
|
|
||||||
|
# Read from config file
|
||||||
|
try:
|
||||||
|
from hermes_cli.config import load_config
|
||||||
|
config = load_config()
|
||||||
|
except ImportError:
|
||||||
|
return "auto", model
|
||||||
|
|
||||||
|
# Check auxiliary.{task} section
|
||||||
|
aux = config.get("auxiliary", {})
|
||||||
|
task_config = aux.get(task, {})
|
||||||
|
cfg_provider = task_config.get("provider", "").strip() or None
|
||||||
|
cfg_model = task_config.get("model", "").strip() or None
|
||||||
|
|
||||||
|
# Backwards compat: compression section has its own keys
|
||||||
|
if task == "compression" and not cfg_provider:
|
||||||
|
comp = config.get("compression", {})
|
||||||
|
cfg_provider = comp.get("summary_provider", "").strip() or None
|
||||||
|
cfg_model = cfg_model or comp.get("summary_model", "").strip() or None
|
||||||
|
|
||||||
|
if cfg_provider and cfg_provider != "auto":
|
||||||
|
return cfg_provider, model or cfg_model
|
||||||
|
return "auto", model or cfg_model
|
||||||
|
|
||||||
|
return "auto", model
|
||||||
|
|
||||||
|
|
||||||
|
def _build_call_kwargs(
|
||||||
|
provider: str,
|
||||||
|
model: str,
|
||||||
|
messages: list,
|
||||||
|
temperature: Optional[float] = None,
|
||||||
|
max_tokens: Optional[int] = None,
|
||||||
|
tools: Optional[list] = None,
|
||||||
|
timeout: float = 30.0,
|
||||||
|
extra_body: Optional[dict] = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Build kwargs for .chat.completions.create() with model/provider adjustments."""
|
||||||
|
kwargs: Dict[str, Any] = {
|
||||||
|
"model": model,
|
||||||
|
"messages": messages,
|
||||||
|
"timeout": timeout,
|
||||||
|
}
|
||||||
|
|
||||||
|
if temperature is not None:
|
||||||
|
kwargs["temperature"] = temperature
|
||||||
|
|
||||||
|
if max_tokens is not None:
|
||||||
|
# Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens.
|
||||||
|
# Direct OpenAI api.openai.com with newer models needs max_completion_tokens.
|
||||||
|
if provider == "custom":
|
||||||
|
custom_base = os.getenv("OPENAI_BASE_URL", "")
|
||||||
|
if "api.openai.com" in custom_base.lower():
|
||||||
|
kwargs["max_completion_tokens"] = max_tokens
|
||||||
|
else:
|
||||||
|
kwargs["max_tokens"] = max_tokens
|
||||||
|
else:
|
||||||
|
kwargs["max_tokens"] = max_tokens
|
||||||
|
|
||||||
|
if tools:
|
||||||
|
kwargs["tools"] = tools
|
||||||
|
|
||||||
|
# Provider-specific extra_body
|
||||||
|
merged_extra = dict(extra_body or {})
|
||||||
|
if provider == "nous" or auxiliary_is_nous:
|
||||||
|
merged_extra.setdefault("tags", []).extend(["product=hermes-agent"])
|
||||||
|
if merged_extra:
|
||||||
|
kwargs["extra_body"] = merged_extra
|
||||||
|
|
||||||
|
return kwargs
|
||||||
|
|
||||||
|
|
||||||
|
def call_llm(
|
||||||
|
task: str = None,
|
||||||
|
*,
|
||||||
|
provider: str = None,
|
||||||
|
model: str = None,
|
||||||
|
messages: list,
|
||||||
|
temperature: float = None,
|
||||||
|
max_tokens: int = None,
|
||||||
|
tools: list = None,
|
||||||
|
timeout: float = 30.0,
|
||||||
|
extra_body: dict = None,
|
||||||
|
) -> Any:
|
||||||
|
"""Centralized synchronous LLM call.
|
||||||
|
|
||||||
|
Resolves provider + model (from task config, explicit args, or auto-detect),
|
||||||
|
handles auth, request formatting, and model-specific arg adjustments.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
task: Auxiliary task name ("compression", "vision", "web_extract",
|
||||||
|
"session_search", "skills_hub", "mcp", "flush_memories").
|
||||||
|
Reads provider:model from config/env. Ignored if provider is set.
|
||||||
|
provider: Explicit provider override.
|
||||||
|
model: Explicit model override.
|
||||||
|
messages: Chat messages list.
|
||||||
|
temperature: Sampling temperature (None = provider default).
|
||||||
|
max_tokens: Max output tokens (handles max_tokens vs max_completion_tokens).
|
||||||
|
tools: Tool definitions (for function calling).
|
||||||
|
timeout: Request timeout in seconds.
|
||||||
|
extra_body: Additional request body fields.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Response object with .choices[0].message.content
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
RuntimeError: If no provider is configured.
|
||||||
|
"""
|
||||||
|
resolved_provider, resolved_model = _resolve_task_provider_model(
|
||||||
|
task, provider, model)
|
||||||
|
|
||||||
|
client, final_model = _get_cached_client(resolved_provider, resolved_model)
|
||||||
|
if client is None:
|
||||||
|
# Fallback: try openrouter
|
||||||
|
if resolved_provider != "openrouter":
|
||||||
|
logger.warning("Provider %s unavailable, falling back to openrouter",
|
||||||
|
resolved_provider)
|
||||||
|
client, final_model = _get_cached_client(
|
||||||
|
"openrouter", resolved_model or _OPENROUTER_MODEL)
|
||||||
|
if client is None:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"No LLM provider configured for task={task} provider={resolved_provider}. "
|
||||||
|
f"Run: hermes setup")
|
||||||
|
|
||||||
|
kwargs = _build_call_kwargs(
|
||||||
|
resolved_provider, final_model, messages,
|
||||||
|
temperature=temperature, max_tokens=max_tokens,
|
||||||
|
tools=tools, timeout=timeout, extra_body=extra_body)
|
||||||
|
|
||||||
|
# Handle max_tokens vs max_completion_tokens retry
|
||||||
|
try:
|
||||||
|
return client.chat.completions.create(**kwargs)
|
||||||
|
except Exception as first_err:
|
||||||
|
err_str = str(first_err)
|
||||||
|
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||||
|
kwargs.pop("max_tokens", None)
|
||||||
|
kwargs["max_completion_tokens"] = max_tokens
|
||||||
|
return client.chat.completions.create(**kwargs)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
async def async_call_llm(
|
||||||
|
task: str = None,
|
||||||
|
*,
|
||||||
|
provider: str = None,
|
||||||
|
model: str = None,
|
||||||
|
messages: list,
|
||||||
|
temperature: float = None,
|
||||||
|
max_tokens: int = None,
|
||||||
|
tools: list = None,
|
||||||
|
timeout: float = 30.0,
|
||||||
|
extra_body: dict = None,
|
||||||
|
) -> Any:
|
||||||
|
"""Centralized asynchronous LLM call.
|
||||||
|
|
||||||
|
Same as call_llm() but async. See call_llm() for full documentation.
|
||||||
|
"""
|
||||||
|
resolved_provider, resolved_model = _resolve_task_provider_model(
|
||||||
|
task, provider, model)
|
||||||
|
|
||||||
|
client, final_model = _get_cached_client(
|
||||||
|
resolved_provider, resolved_model, async_mode=True)
|
||||||
|
if client is None:
|
||||||
|
if resolved_provider != "openrouter":
|
||||||
|
logger.warning("Provider %s unavailable, falling back to openrouter",
|
||||||
|
resolved_provider)
|
||||||
|
client, final_model = _get_cached_client(
|
||||||
|
"openrouter", resolved_model or _OPENROUTER_MODEL,
|
||||||
|
async_mode=True)
|
||||||
|
if client is None:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"No LLM provider configured for task={task} provider={resolved_provider}. "
|
||||||
|
f"Run: hermes setup")
|
||||||
|
|
||||||
|
kwargs = _build_call_kwargs(
|
||||||
|
resolved_provider, final_model, messages,
|
||||||
|
temperature=temperature, max_tokens=max_tokens,
|
||||||
|
tools=tools, timeout=timeout, extra_body=extra_body)
|
||||||
|
|
||||||
|
try:
|
||||||
|
return await client.chat.completions.create(**kwargs)
|
||||||
|
except Exception as first_err:
|
||||||
|
err_str = str(first_err)
|
||||||
|
if "max_tokens" in err_str or "unsupported_parameter" in err_str:
|
||||||
|
kwargs.pop("max_tokens", None)
|
||||||
|
kwargs["max_completion_tokens"] = max_tokens
|
||||||
|
return await client.chat.completions.create(**kwargs)
|
||||||
|
raise
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ import logging
|
||||||
import os
|
import os
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
from agent.auxiliary_client import get_text_auxiliary_client
|
from agent.auxiliary_client import call_llm
|
||||||
from agent.model_metadata import (
|
from agent.model_metadata import (
|
||||||
get_model_context_length,
|
get_model_context_length,
|
||||||
estimate_messages_tokens_rough,
|
estimate_messages_tokens_rough,
|
||||||
|
|
@ -53,8 +53,7 @@ class ContextCompressor:
|
||||||
self.last_completion_tokens = 0
|
self.last_completion_tokens = 0
|
||||||
self.last_total_tokens = 0
|
self.last_total_tokens = 0
|
||||||
|
|
||||||
self.client, default_model = get_text_auxiliary_client("compression")
|
self.summary_model = summary_model_override or ""
|
||||||
self.summary_model = summary_model_override or default_model
|
|
||||||
|
|
||||||
def update_from_response(self, usage: Dict[str, Any]):
|
def update_from_response(self, usage: Dict[str, Any]):
|
||||||
"""Update tracked token usage from API response."""
|
"""Update tracked token usage from API response."""
|
||||||
|
|
@ -120,84 +119,30 @@ TURNS TO SUMMARIZE:
|
||||||
|
|
||||||
Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||||
|
|
||||||
# 1. Try the auxiliary model (cheap/fast)
|
# Use the centralized LLM router — handles provider resolution,
|
||||||
if self.client:
|
# auth, and fallback internally.
|
||||||
try:
|
|
||||||
return self._call_summary_model(self.client, self.summary_model, prompt)
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(f"Failed to generate context summary with auxiliary model: {e}")
|
|
||||||
|
|
||||||
# 2. Fallback: try the user's main model endpoint
|
|
||||||
fallback_client, fallback_model = self._get_fallback_client()
|
|
||||||
if fallback_client is not None:
|
|
||||||
try:
|
|
||||||
logger.info("Retrying context summary with main model (%s)", fallback_model)
|
|
||||||
summary = self._call_summary_model(fallback_client, fallback_model, prompt)
|
|
||||||
self.client = fallback_client
|
|
||||||
self.summary_model = fallback_model
|
|
||||||
return summary
|
|
||||||
except Exception as fallback_err:
|
|
||||||
logging.warning(f"Main model summary also failed: {fallback_err}")
|
|
||||||
|
|
||||||
# 3. All models failed — return None so the caller drops turns without a summary
|
|
||||||
logging.warning("Context compression: no model available for summary. Middle turns will be dropped without summary.")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _call_summary_model(self, client, model: str, prompt: str) -> str:
|
|
||||||
"""Make the actual LLM call to generate a summary. Raises on failure."""
|
|
||||||
kwargs = {
|
|
||||||
"model": model,
|
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
|
||||||
"temperature": 0.3,
|
|
||||||
"timeout": 30.0,
|
|
||||||
}
|
|
||||||
# Most providers (OpenRouter, local models) use max_tokens.
|
|
||||||
# Direct OpenAI with newer models (gpt-4o, o-series, gpt-5+)
|
|
||||||
# requires max_completion_tokens instead.
|
|
||||||
try:
|
try:
|
||||||
kwargs["max_tokens"] = self.summary_target_tokens * 2
|
call_kwargs = {
|
||||||
response = client.chat.completions.create(**kwargs)
|
"task": "compression",
|
||||||
except Exception as first_err:
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
if "max_tokens" in str(first_err) or "unsupported_parameter" in str(first_err):
|
"temperature": 0.3,
|
||||||
kwargs.pop("max_tokens", None)
|
"max_tokens": self.summary_target_tokens * 2,
|
||||||
kwargs["max_completion_tokens"] = self.summary_target_tokens * 2
|
"timeout": 30.0,
|
||||||
response = client.chat.completions.create(**kwargs)
|
}
|
||||||
else:
|
if self.summary_model:
|
||||||
raise
|
call_kwargs["model"] = self.summary_model
|
||||||
|
response = call_llm(**call_kwargs)
|
||||||
summary = response.choices[0].message.content.strip()
|
summary = response.choices[0].message.content.strip()
|
||||||
if not summary.startswith("[CONTEXT SUMMARY]:"):
|
if not summary.startswith("[CONTEXT SUMMARY]:"):
|
||||||
summary = "[CONTEXT SUMMARY]: " + summary
|
summary = "[CONTEXT SUMMARY]: " + summary
|
||||||
return summary
|
return summary
|
||||||
|
except RuntimeError:
|
||||||
def _get_fallback_client(self):
|
logging.warning("Context compression: no provider available for "
|
||||||
"""Try to build a fallback client from the main model's endpoint config.
|
"summary. Middle turns will be dropped without summary.")
|
||||||
|
return None
|
||||||
When the primary auxiliary client fails (e.g. stale OpenRouter key), this
|
except Exception as e:
|
||||||
creates a client using the user's active custom endpoint (OPENAI_BASE_URL)
|
logging.warning("Failed to generate context summary: %s", e)
|
||||||
so compression can still produce a real summary instead of a static string.
|
return None
|
||||||
|
|
||||||
Returns (client, model) or (None, None).
|
|
||||||
"""
|
|
||||||
custom_base = os.getenv("OPENAI_BASE_URL")
|
|
||||||
custom_key = os.getenv("OPENAI_API_KEY")
|
|
||||||
if not custom_base or not custom_key:
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
# Don't fallback to the same provider that just failed
|
|
||||||
from hermes_constants import OPENROUTER_BASE_URL
|
|
||||||
if custom_base.rstrip("/") == OPENROUTER_BASE_URL.rstrip("/"):
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
model = os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or self.model
|
|
||||||
try:
|
|
||||||
from openai import OpenAI as _OpenAI
|
|
||||||
client = _OpenAI(api_key=custom_key, base_url=custom_base)
|
|
||||||
logger.debug("Built fallback auxiliary client: %s via %s", model, custom_base)
|
|
||||||
return client, model
|
|
||||||
except Exception as exc:
|
|
||||||
logger.debug("Could not build fallback auxiliary client: %s", exc)
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
# ------------------------------------------------------------------
|
# ------------------------------------------------------------------
|
||||||
# Tool-call / tool-result pair integrity helpers
|
# Tool-call / tool-result pair integrity helpers
|
||||||
|
|
|
||||||
143
cli.py
143
cli.py
|
|
@ -1129,12 +1129,17 @@ class HermesCLI:
|
||||||
self.verbose = verbose if verbose is not None else (self.tool_progress_mode == "verbose")
|
self.verbose = verbose if verbose is not None else (self.tool_progress_mode == "verbose")
|
||||||
|
|
||||||
# Configuration - priority: CLI args > env vars > config file
|
# Configuration - priority: CLI args > env vars > config file
|
||||||
# Model can come from: CLI arg, LLM_MODEL env, OPENAI_MODEL env (custom endpoint), or config
|
# Model comes from: CLI arg or config.yaml (single source of truth).
|
||||||
self.model = model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or CLI_CONFIG["model"]["default"]
|
# LLM_MODEL/OPENAI_MODEL env vars are NOT checked — config.yaml is
|
||||||
|
# authoritative. This avoids conflicts in multi-agent setups where
|
||||||
|
# env vars would stomp each other.
|
||||||
|
_model_config = CLI_CONFIG.get("model", {})
|
||||||
|
_config_model = _model_config.get("default", "") if isinstance(_model_config, dict) else (_model_config or "")
|
||||||
|
self.model = model or _config_model or "anthropic/claude-opus-4.6"
|
||||||
# Track whether model was explicitly chosen by the user or fell back
|
# Track whether model was explicitly chosen by the user or fell back
|
||||||
# to the global default. Provider-specific normalisation may override
|
# to the global default. Provider-specific normalisation may override
|
||||||
# the default silently but should warn when overriding an explicit choice.
|
# the default silently but should warn when overriding an explicit choice.
|
||||||
self._model_is_default = not (model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL"))
|
self._model_is_default = not model
|
||||||
|
|
||||||
self._explicit_api_key = api_key
|
self._explicit_api_key = api_key
|
||||||
self._explicit_base_url = base_url
|
self._explicit_base_url = base_url
|
||||||
|
|
@ -2260,6 +2265,72 @@ class HermesCLI:
|
||||||
remaining = len(self.conversation_history)
|
remaining = len(self.conversation_history)
|
||||||
print(f" {remaining} message(s) remaining in history.")
|
print(f" {remaining} message(s) remaining in history.")
|
||||||
|
|
||||||
|
def _show_model_and_providers(self):
|
||||||
|
"""Unified /model and /provider display.
|
||||||
|
|
||||||
|
Shows current model + provider, then lists all authenticated
|
||||||
|
providers with their available models so users can switch easily.
|
||||||
|
"""
|
||||||
|
from hermes_cli.models import (
|
||||||
|
curated_models_for_provider, list_available_providers,
|
||||||
|
normalize_provider, _PROVIDER_LABELS,
|
||||||
|
)
|
||||||
|
from hermes_cli.auth import resolve_provider as _resolve_provider
|
||||||
|
|
||||||
|
# Resolve current provider
|
||||||
|
raw_provider = normalize_provider(self.provider)
|
||||||
|
if raw_provider == "auto":
|
||||||
|
try:
|
||||||
|
current = _resolve_provider(
|
||||||
|
self.requested_provider,
|
||||||
|
explicit_api_key=self._explicit_api_key,
|
||||||
|
explicit_base_url=self._explicit_base_url,
|
||||||
|
)
|
||||||
|
except Exception:
|
||||||
|
current = "openrouter"
|
||||||
|
else:
|
||||||
|
current = raw_provider
|
||||||
|
current_label = _PROVIDER_LABELS.get(current, current)
|
||||||
|
|
||||||
|
print(f"\n Current: {self.model} via {current_label}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Show all authenticated providers with their models
|
||||||
|
providers = list_available_providers()
|
||||||
|
authed = [p for p in providers if p["authenticated"]]
|
||||||
|
unauthed = [p for p in providers if not p["authenticated"]]
|
||||||
|
|
||||||
|
if authed:
|
||||||
|
print(" Authenticated providers & models:")
|
||||||
|
for p in authed:
|
||||||
|
is_active = p["id"] == current
|
||||||
|
marker = " ← active" if is_active else ""
|
||||||
|
print(f" [{p['id']}]{marker}")
|
||||||
|
curated = curated_models_for_provider(p["id"])
|
||||||
|
if curated:
|
||||||
|
for mid, desc in curated:
|
||||||
|
current_marker = " ← current" if (is_active and mid == self.model) else ""
|
||||||
|
print(f" {mid}{current_marker}")
|
||||||
|
else:
|
||||||
|
print(f" (use /model {p['id']}:<model-name>)")
|
||||||
|
print()
|
||||||
|
|
||||||
|
if unauthed:
|
||||||
|
names = ", ".join(p["label"] for p in unauthed)
|
||||||
|
print(f" Not configured: {names}")
|
||||||
|
print(f" Run: hermes setup")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print(" Switch model: /model <model-name>")
|
||||||
|
print(" Switch provider: /model <provider>:<model-name>")
|
||||||
|
if authed and len(authed) > 1:
|
||||||
|
# Show a concrete example with a non-active provider
|
||||||
|
other = next((p for p in authed if p["id"] != current), authed[0])
|
||||||
|
other_models = curated_models_for_provider(other["id"])
|
||||||
|
if other_models:
|
||||||
|
example_model = other_models[0][0]
|
||||||
|
print(f" Example: /model {other['id']}:{example_model}")
|
||||||
|
|
||||||
def _handle_prompt_command(self, cmd: str):
|
def _handle_prompt_command(self, cmd: str):
|
||||||
"""Handle the /prompt command to view or set system prompt."""
|
"""Handle the /prompt command to view or set system prompt."""
|
||||||
parts = cmd.split(maxsplit=1)
|
parts = cmd.split(maxsplit=1)
|
||||||
|
|
@ -2724,7 +2795,11 @@ class HermesCLI:
|
||||||
base_url_for_probe = runtime.get("base_url", "")
|
base_url_for_probe = runtime.get("base_url", "")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
|
provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
|
||||||
print(f"(>_<) Could not resolve credentials for provider '{provider_label}': {e}")
|
if target_provider == "custom":
|
||||||
|
print(f"(>_<) Custom endpoint not configured. Set OPENAI_BASE_URL and OPENAI_API_KEY,")
|
||||||
|
print(f" or run: hermes setup → Custom OpenAI-compatible endpoint")
|
||||||
|
else:
|
||||||
|
print(f"(>_<) Could not resolve credentials for provider '{provider_label}': {e}")
|
||||||
print(f"(^_^) Current model unchanged: {self.model}")
|
print(f"(^_^) Current model unchanged: {self.model}")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
@ -2771,65 +2846,9 @@ class HermesCLI:
|
||||||
print(f" Reason: {message}")
|
print(f" Reason: {message}")
|
||||||
print(" Note: Model will revert on restart. Use a verified model to save to config.")
|
print(" Note: Model will revert on restart. Use a verified model to save to config.")
|
||||||
else:
|
else:
|
||||||
from hermes_cli.models import curated_models_for_provider, normalize_provider, _PROVIDER_LABELS
|
self._show_model_and_providers()
|
||||||
from hermes_cli.auth import resolve_provider as _resolve_provider
|
|
||||||
# Resolve "auto" to the actual provider using credential detection
|
|
||||||
raw_provider = normalize_provider(self.provider)
|
|
||||||
if raw_provider == "auto":
|
|
||||||
try:
|
|
||||||
display_provider = _resolve_provider(
|
|
||||||
self.requested_provider,
|
|
||||||
explicit_api_key=self._explicit_api_key,
|
|
||||||
explicit_base_url=self._explicit_base_url,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
display_provider = "openrouter"
|
|
||||||
else:
|
|
||||||
display_provider = raw_provider
|
|
||||||
provider_label = _PROVIDER_LABELS.get(display_provider, display_provider)
|
|
||||||
print(f"\n Current model: {self.model}")
|
|
||||||
print(f" Current provider: {provider_label}")
|
|
||||||
print()
|
|
||||||
curated = curated_models_for_provider(display_provider)
|
|
||||||
if curated:
|
|
||||||
print(f" Available models ({provider_label}):")
|
|
||||||
for mid, desc in curated:
|
|
||||||
marker = " ←" if mid == self.model else ""
|
|
||||||
label = f" {desc}" if desc else ""
|
|
||||||
print(f" {mid}{label}{marker}")
|
|
||||||
print()
|
|
||||||
print(" Usage: /model <model-name>")
|
|
||||||
print(" /model provider:model-name (to switch provider)")
|
|
||||||
print(" Example: /model openrouter:anthropic/claude-sonnet-4.5")
|
|
||||||
print(" See /provider for available providers")
|
|
||||||
elif cmd_lower == "/provider":
|
elif cmd_lower == "/provider":
|
||||||
from hermes_cli.models import list_available_providers, normalize_provider, _PROVIDER_LABELS
|
self._show_model_and_providers()
|
||||||
from hermes_cli.auth import resolve_provider as _resolve_provider
|
|
||||||
# Resolve current provider
|
|
||||||
raw_provider = normalize_provider(self.provider)
|
|
||||||
if raw_provider == "auto":
|
|
||||||
try:
|
|
||||||
current = _resolve_provider(
|
|
||||||
self.requested_provider,
|
|
||||||
explicit_api_key=self._explicit_api_key,
|
|
||||||
explicit_base_url=self._explicit_base_url,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
current = "openrouter"
|
|
||||||
else:
|
|
||||||
current = raw_provider
|
|
||||||
current_label = _PROVIDER_LABELS.get(current, current)
|
|
||||||
print(f"\n Current provider: {current_label} ({current})\n")
|
|
||||||
providers = list_available_providers()
|
|
||||||
print(" Available providers:")
|
|
||||||
for p in providers:
|
|
||||||
marker = " ← active" if p["id"] == current else ""
|
|
||||||
auth = "✓" if p["authenticated"] else "✗"
|
|
||||||
aliases = f" (also: {', '.join(p['aliases'])})" if p["aliases"] else ""
|
|
||||||
print(f" [{auth}] {p['id']:<14} {p['label']}{aliases}{marker}")
|
|
||||||
print()
|
|
||||||
print(" Switch: /model provider:model-name")
|
|
||||||
print(" Setup: hermes setup")
|
|
||||||
elif cmd_lower.startswith("/prompt"):
|
elif cmd_lower.startswith("/prompt"):
|
||||||
# Use original case so prompt text isn't lowercased
|
# Use original case so prompt text isn't lowercased
|
||||||
self._handle_prompt_command(cmd_original)
|
self._handle_prompt_command(cmd_original)
|
||||||
|
|
|
||||||
|
|
@ -180,7 +180,7 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]:
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1")
|
load_dotenv(str(_hermes_home / ".env"), override=True, encoding="latin-1")
|
||||||
|
|
||||||
model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
|
model = os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
|
||||||
|
|
||||||
# Load config.yaml for model, reasoning, prefill, toolsets, provider routing
|
# Load config.yaml for model, reasoning, prefill, toolsets, provider routing
|
||||||
_cfg = {}
|
_cfg = {}
|
||||||
|
|
|
||||||
|
|
@ -1575,7 +1575,7 @@ class GatewayRunner:
|
||||||
config_path = _hermes_home / 'config.yaml'
|
config_path = _hermes_home / 'config.yaml'
|
||||||
|
|
||||||
# Resolve current model and provider from config
|
# Resolve current model and provider from config
|
||||||
current = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
|
current = os.getenv("HERMES_MODEL") or "anthropic/claude-opus-4.6"
|
||||||
current_provider = "openrouter"
|
current_provider = "openrouter"
|
||||||
try:
|
try:
|
||||||
if config_path.exists():
|
if config_path.exists():
|
||||||
|
|
|
||||||
|
|
@ -108,14 +108,6 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||||
auth_type="oauth_external",
|
auth_type="oauth_external",
|
||||||
inference_base_url=DEFAULT_CODEX_BASE_URL,
|
inference_base_url=DEFAULT_CODEX_BASE_URL,
|
||||||
),
|
),
|
||||||
"nous-api": ProviderConfig(
|
|
||||||
id="nous-api",
|
|
||||||
name="Nous Portal (API Key)",
|
|
||||||
auth_type="api_key",
|
|
||||||
inference_base_url="https://inference-api.nousresearch.com/v1",
|
|
||||||
api_key_env_vars=("NOUS_API_KEY",),
|
|
||||||
base_url_env_var="NOUS_BASE_URL",
|
|
||||||
),
|
|
||||||
"zai": ProviderConfig(
|
"zai": ProviderConfig(
|
||||||
id="zai",
|
id="zai",
|
||||||
name="Z.AI / GLM",
|
name="Z.AI / GLM",
|
||||||
|
|
@ -521,7 +513,6 @@ def resolve_provider(
|
||||||
|
|
||||||
# Normalize provider aliases
|
# Normalize provider aliases
|
||||||
_PROVIDER_ALIASES = {
|
_PROVIDER_ALIASES = {
|
||||||
"nous_api": "nous-api", "nousapi": "nous-api", "nous-portal-api": "nous-api",
|
|
||||||
"glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
|
"glm": "zai", "z-ai": "zai", "z.ai": "zai", "zhipu": "zai",
|
||||||
"kimi": "kimi-coding", "moonshot": "kimi-coding",
|
"kimi": "kimi-coding", "moonshot": "kimi-coding",
|
||||||
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
|
"minimax-china": "minimax-cn", "minimax_cn": "minimax-cn",
|
||||||
|
|
@ -1680,8 +1671,12 @@ def _prompt_model_selection(model_ids: List[str], current_model: str = "") -> Op
|
||||||
|
|
||||||
|
|
||||||
def _save_model_choice(model_id: str) -> None:
|
def _save_model_choice(model_id: str) -> None:
|
||||||
"""Save the selected model to config.yaml and .env."""
|
"""Save the selected model to config.yaml (single source of truth).
|
||||||
from hermes_cli.config import save_config, load_config, save_env_value
|
|
||||||
|
The model is stored in config.yaml only — NOT in .env. This avoids
|
||||||
|
conflicts in multi-agent setups where env vars would stomp each other.
|
||||||
|
"""
|
||||||
|
from hermes_cli.config import save_config, load_config
|
||||||
|
|
||||||
config = load_config()
|
config = load_config()
|
||||||
# Always use dict format so provider/base_url can be stored alongside
|
# Always use dict format so provider/base_url can be stored alongside
|
||||||
|
|
@ -1690,7 +1685,6 @@ def _save_model_choice(model_id: str) -> None:
|
||||||
else:
|
else:
|
||||||
config["model"] = {"default": model_id}
|
config["model"] = {"default": model_id}
|
||||||
save_config(config)
|
save_config(config)
|
||||||
save_env_value("LLM_MODEL", model_id)
|
|
||||||
|
|
||||||
|
|
||||||
def login_command(args) -> None:
|
def login_command(args) -> None:
|
||||||
|
|
|
||||||
|
|
@ -126,17 +126,41 @@ DEFAULT_CONFIG = {
|
||||||
"summary_provider": "auto",
|
"summary_provider": "auto",
|
||||||
},
|
},
|
||||||
|
|
||||||
# Auxiliary model overrides (advanced). By default Hermes auto-selects
|
# Auxiliary model config — provider:model for each side task.
|
||||||
# the provider and model for each side task. Set these to override.
|
# Format: provider is the provider name, model is the model slug.
|
||||||
|
# "auto" for provider = auto-detect best available provider.
|
||||||
|
# Empty model = use provider's default auxiliary model.
|
||||||
|
# All tasks fall back to openrouter:google/gemini-3-flash-preview if
|
||||||
|
# the configured provider is unavailable.
|
||||||
"auxiliary": {
|
"auxiliary": {
|
||||||
"vision": {
|
"vision": {
|
||||||
"provider": "auto", # auto | openrouter | nous | main
|
"provider": "auto", # auto | openrouter | nous | codex | custom
|
||||||
"model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o"
|
"model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o"
|
||||||
},
|
},
|
||||||
"web_extract": {
|
"web_extract": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
"model": "",
|
"model": "",
|
||||||
},
|
},
|
||||||
|
"compression": {
|
||||||
|
"provider": "auto",
|
||||||
|
"model": "",
|
||||||
|
},
|
||||||
|
"session_search": {
|
||||||
|
"provider": "auto",
|
||||||
|
"model": "",
|
||||||
|
},
|
||||||
|
"skills_hub": {
|
||||||
|
"provider": "auto",
|
||||||
|
"model": "",
|
||||||
|
},
|
||||||
|
"mcp": {
|
||||||
|
"provider": "auto",
|
||||||
|
"model": "",
|
||||||
|
},
|
||||||
|
"flush_memories": {
|
||||||
|
"provider": "auto",
|
||||||
|
"model": "",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
"display": {
|
"display": {
|
||||||
|
|
@ -224,7 +248,7 @@ DEFAULT_CONFIG = {
|
||||||
"personalities": {},
|
"personalities": {},
|
||||||
|
|
||||||
# Config schema version - bump this when adding new required fields
|
# Config schema version - bump this when adding new required fields
|
||||||
"_config_version": 6,
|
"_config_version": 7,
|
||||||
}
|
}
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
@ -249,14 +273,6 @@ REQUIRED_ENV_VARS = {}
|
||||||
# Optional environment variables that enhance functionality
|
# Optional environment variables that enhance functionality
|
||||||
OPTIONAL_ENV_VARS = {
|
OPTIONAL_ENV_VARS = {
|
||||||
# ── Provider (handled in provider selection, not shown in checklists) ──
|
# ── Provider (handled in provider selection, not shown in checklists) ──
|
||||||
"NOUS_API_KEY": {
|
|
||||||
"description": "Nous Portal API key (direct API key access to Nous inference)",
|
|
||||||
"prompt": "Nous Portal API key",
|
|
||||||
"url": "https://portal.nousresearch.com",
|
|
||||||
"password": True,
|
|
||||||
"category": "provider",
|
|
||||||
"advanced": True,
|
|
||||||
},
|
|
||||||
"NOUS_BASE_URL": {
|
"NOUS_BASE_URL": {
|
||||||
"description": "Nous Portal base URL override",
|
"description": "Nous Portal base URL override",
|
||||||
"prompt": "Nous Portal base URL (leave empty for default)",
|
"prompt": "Nous Portal base URL (leave empty for default)",
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,19 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [
|
||||||
]
|
]
|
||||||
|
|
||||||
_PROVIDER_MODELS: dict[str, list[str]] = {
|
_PROVIDER_MODELS: dict[str, list[str]] = {
|
||||||
|
"nous": [
|
||||||
|
"claude-opus-4-6",
|
||||||
|
"claude-sonnet-4-6",
|
||||||
|
"gpt-5.4",
|
||||||
|
"gemini-3-flash",
|
||||||
|
"gemini-3.0-pro-preview",
|
||||||
|
"deepseek-v3.2",
|
||||||
|
],
|
||||||
|
"openai-codex": [
|
||||||
|
"gpt-5.2-codex",
|
||||||
|
"gpt-5.1-codex-mini",
|
||||||
|
"gpt-5.1-codex-max",
|
||||||
|
],
|
||||||
"zai": [
|
"zai": [
|
||||||
"glm-5",
|
"glm-5",
|
||||||
"glm-4.7",
|
"glm-4.7",
|
||||||
|
|
@ -263,6 +276,15 @@ def validate_requested_model(
|
||||||
"message": "Model names cannot contain spaces.",
|
"message": "Model names cannot contain spaces.",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Custom endpoints can serve any model — skip validation
|
||||||
|
if normalized == "custom":
|
||||||
|
return {
|
||||||
|
"accepted": True,
|
||||||
|
"persist": True,
|
||||||
|
"recognized": False,
|
||||||
|
"message": None,
|
||||||
|
}
|
||||||
|
|
||||||
# Probe the live API to check if the model actually exists
|
# Probe the live API to check if the model actually exists
|
||||||
api_models = fetch_api_models(api_key, base_url)
|
api_models = fetch_api_models(api_key, base_url)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -618,7 +618,6 @@ def setup_model_provider(config: dict):
|
||||||
keep_label = None # No provider configured — don't show "Keep current"
|
keep_label = None # No provider configured — don't show "Keep current"
|
||||||
|
|
||||||
provider_choices = [
|
provider_choices = [
|
||||||
"Nous Portal API key (direct API key access)",
|
|
||||||
"Login with Nous Portal (Nous Research subscription — OAuth)",
|
"Login with Nous Portal (Nous Research subscription — OAuth)",
|
||||||
"Login with OpenAI Codex",
|
"Login with OpenAI Codex",
|
||||||
"OpenRouter API key (100+ models, pay-per-use)",
|
"OpenRouter API key (100+ models, pay-per-use)",
|
||||||
|
|
@ -632,7 +631,7 @@ def setup_model_provider(config: dict):
|
||||||
provider_choices.append(keep_label)
|
provider_choices.append(keep_label)
|
||||||
|
|
||||||
# Default to "Keep current" if a provider exists, otherwise OpenRouter (most common)
|
# Default to "Keep current" if a provider exists, otherwise OpenRouter (most common)
|
||||||
default_provider = len(provider_choices) - 1 if has_any_provider else 3
|
default_provider = len(provider_choices) - 1 if has_any_provider else 2
|
||||||
|
|
||||||
if not has_any_provider:
|
if not has_any_provider:
|
||||||
print_warning("An inference provider is required for Hermes to work.")
|
print_warning("An inference provider is required for Hermes to work.")
|
||||||
|
|
@ -648,42 +647,7 @@ def setup_model_provider(config: dict):
|
||||||
)
|
)
|
||||||
nous_models = [] # populated if Nous login succeeds
|
nous_models = [] # populated if Nous login succeeds
|
||||||
|
|
||||||
if provider_idx == 0: # Nous Portal API Key (direct)
|
if provider_idx == 0: # Nous Portal (OAuth)
|
||||||
selected_provider = "nous-api"
|
|
||||||
print()
|
|
||||||
print_header("Nous Portal API Key")
|
|
||||||
print_info("Use a Nous Portal API key for direct access to Nous inference.")
|
|
||||||
print_info("Get your API key at: https://portal.nousresearch.com")
|
|
||||||
print()
|
|
||||||
|
|
||||||
existing_key = get_env_value("NOUS_API_KEY")
|
|
||||||
if existing_key:
|
|
||||||
print_info(f"Current: {existing_key[:8]}... (configured)")
|
|
||||||
if prompt_yes_no("Update Nous API key?", False):
|
|
||||||
api_key = prompt(" Nous API key", password=True)
|
|
||||||
if api_key:
|
|
||||||
save_env_value("NOUS_API_KEY", api_key)
|
|
||||||
print_success("Nous API key updated")
|
|
||||||
else:
|
|
||||||
api_key = prompt(" Nous API key", password=True)
|
|
||||||
if api_key:
|
|
||||||
save_env_value("NOUS_API_KEY", api_key)
|
|
||||||
print_success("Nous API key saved")
|
|
||||||
else:
|
|
||||||
print_warning("Skipped - agent won't work without an API key")
|
|
||||||
|
|
||||||
# Clear custom endpoint vars if switching
|
|
||||||
if existing_custom:
|
|
||||||
save_env_value("OPENAI_BASE_URL", "")
|
|
||||||
save_env_value("OPENAI_API_KEY", "")
|
|
||||||
_update_config_for_provider(
|
|
||||||
"nous-api", "https://inference-api.nousresearch.com/v1"
|
|
||||||
)
|
|
||||||
_set_model_provider(
|
|
||||||
config, "nous-api", "https://inference-api.nousresearch.com/v1"
|
|
||||||
)
|
|
||||||
|
|
||||||
elif provider_idx == 1: # Nous Portal
|
|
||||||
selected_provider = "nous"
|
selected_provider = "nous"
|
||||||
print()
|
print()
|
||||||
print_header("Nous Portal Login")
|
print_header("Nous Portal Login")
|
||||||
|
|
@ -731,7 +695,7 @@ def setup_model_provider(config: dict):
|
||||||
print_info("You can try again later with: hermes model")
|
print_info("You can try again later with: hermes model")
|
||||||
selected_provider = None
|
selected_provider = None
|
||||||
|
|
||||||
elif provider_idx == 2: # OpenAI Codex
|
elif provider_idx == 1: # OpenAI Codex
|
||||||
selected_provider = "openai-codex"
|
selected_provider = "openai-codex"
|
||||||
print()
|
print()
|
||||||
print_header("OpenAI Codex Login")
|
print_header("OpenAI Codex Login")
|
||||||
|
|
@ -757,7 +721,7 @@ def setup_model_provider(config: dict):
|
||||||
print_info("You can try again later with: hermes model")
|
print_info("You can try again later with: hermes model")
|
||||||
selected_provider = None
|
selected_provider = None
|
||||||
|
|
||||||
elif provider_idx == 3: # OpenRouter
|
elif provider_idx == 2: # OpenRouter
|
||||||
selected_provider = "openrouter"
|
selected_provider = "openrouter"
|
||||||
print()
|
print()
|
||||||
print_header("OpenRouter API Key")
|
print_header("OpenRouter API Key")
|
||||||
|
|
@ -812,7 +776,7 @@ def setup_model_provider(config: dict):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Could not save provider to config.yaml: %s", e)
|
logger.debug("Could not save provider to config.yaml: %s", e)
|
||||||
|
|
||||||
elif provider_idx == 4: # Custom endpoint
|
elif provider_idx == 3: # Custom endpoint
|
||||||
selected_provider = "custom"
|
selected_provider = "custom"
|
||||||
print()
|
print()
|
||||||
print_header("Custom OpenAI-Compatible Endpoint")
|
print_header("Custom OpenAI-Compatible Endpoint")
|
||||||
|
|
@ -844,7 +808,6 @@ def setup_model_provider(config: dict):
|
||||||
save_env_value("OPENAI_API_KEY", api_key)
|
save_env_value("OPENAI_API_KEY", api_key)
|
||||||
if model_name:
|
if model_name:
|
||||||
_set_default_model(config, model_name)
|
_set_default_model(config, model_name)
|
||||||
save_env_value("LLM_MODEL", model_name)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from hermes_cli.auth import deactivate_provider
|
from hermes_cli.auth import deactivate_provider
|
||||||
|
|
@ -882,7 +845,7 @@ def setup_model_provider(config: dict):
|
||||||
|
|
||||||
print_success("Custom endpoint configured")
|
print_success("Custom endpoint configured")
|
||||||
|
|
||||||
elif provider_idx == 5: # Z.AI / GLM
|
elif provider_idx == 4: # Z.AI / GLM
|
||||||
selected_provider = "zai"
|
selected_provider = "zai"
|
||||||
print()
|
print()
|
||||||
print_header("Z.AI / GLM API Key")
|
print_header("Z.AI / GLM API Key")
|
||||||
|
|
@ -942,7 +905,7 @@ def setup_model_provider(config: dict):
|
||||||
_update_config_for_provider("zai", zai_base_url)
|
_update_config_for_provider("zai", zai_base_url)
|
||||||
_set_model_provider(config, "zai", zai_base_url)
|
_set_model_provider(config, "zai", zai_base_url)
|
||||||
|
|
||||||
elif provider_idx == 6: # Kimi / Moonshot
|
elif provider_idx == 5: # Kimi / Moonshot
|
||||||
selected_provider = "kimi-coding"
|
selected_provider = "kimi-coding"
|
||||||
print()
|
print()
|
||||||
print_header("Kimi / Moonshot API Key")
|
print_header("Kimi / Moonshot API Key")
|
||||||
|
|
@ -975,7 +938,7 @@ def setup_model_provider(config: dict):
|
||||||
_update_config_for_provider("kimi-coding", pconfig.inference_base_url)
|
_update_config_for_provider("kimi-coding", pconfig.inference_base_url)
|
||||||
_set_model_provider(config, "kimi-coding", pconfig.inference_base_url)
|
_set_model_provider(config, "kimi-coding", pconfig.inference_base_url)
|
||||||
|
|
||||||
elif provider_idx == 7: # MiniMax
|
elif provider_idx == 6: # MiniMax
|
||||||
selected_provider = "minimax"
|
selected_provider = "minimax"
|
||||||
print()
|
print()
|
||||||
print_header("MiniMax API Key")
|
print_header("MiniMax API Key")
|
||||||
|
|
@ -1008,7 +971,7 @@ def setup_model_provider(config: dict):
|
||||||
_update_config_for_provider("minimax", pconfig.inference_base_url)
|
_update_config_for_provider("minimax", pconfig.inference_base_url)
|
||||||
_set_model_provider(config, "minimax", pconfig.inference_base_url)
|
_set_model_provider(config, "minimax", pconfig.inference_base_url)
|
||||||
|
|
||||||
elif provider_idx == 8: # MiniMax China
|
elif provider_idx == 7: # MiniMax China
|
||||||
selected_provider = "minimax-cn"
|
selected_provider = "minimax-cn"
|
||||||
print()
|
print()
|
||||||
print_header("MiniMax China API Key")
|
print_header("MiniMax China API Key")
|
||||||
|
|
@ -1041,14 +1004,13 @@ def setup_model_provider(config: dict):
|
||||||
_update_config_for_provider("minimax-cn", pconfig.inference_base_url)
|
_update_config_for_provider("minimax-cn", pconfig.inference_base_url)
|
||||||
_set_model_provider(config, "minimax-cn", pconfig.inference_base_url)
|
_set_model_provider(config, "minimax-cn", pconfig.inference_base_url)
|
||||||
|
|
||||||
# else: provider_idx == 9 (Keep current) — only shown when a provider already exists
|
# else: provider_idx == 8 (Keep current) — only shown when a provider already exists
|
||||||
|
|
||||||
# ── OpenRouter API Key for tools (if not already set) ──
|
# ── OpenRouter API Key for tools (if not already set) ──
|
||||||
# Tools (vision, web, MoA) use OpenRouter independently of the main provider.
|
# Tools (vision, web, MoA) use OpenRouter independently of the main provider.
|
||||||
# Prompt for OpenRouter key if not set and a non-OpenRouter provider was chosen.
|
# Prompt for OpenRouter key if not set and a non-OpenRouter provider was chosen.
|
||||||
if selected_provider in (
|
if selected_provider in (
|
||||||
"nous",
|
"nous",
|
||||||
"nous-api",
|
|
||||||
"openai-codex",
|
"openai-codex",
|
||||||
"custom",
|
"custom",
|
||||||
"zai",
|
"zai",
|
||||||
|
|
@ -1121,15 +1083,6 @@ def setup_model_provider(config: dict):
|
||||||
custom = prompt(f" Model name (Enter to keep '{current_model}')")
|
custom = prompt(f" Model name (Enter to keep '{current_model}')")
|
||||||
if custom:
|
if custom:
|
||||||
_set_default_model(config, custom)
|
_set_default_model(config, custom)
|
||||||
save_env_value("LLM_MODEL", custom)
|
|
||||||
elif selected_provider == "nous-api":
|
|
||||||
# Nous API key provider — prompt for model manually
|
|
||||||
print_info("Enter a model name available on Nous inference API.")
|
|
||||||
print_info("Examples: anthropic/claude-opus-4.6, deepseek/deepseek-r1")
|
|
||||||
custom = prompt(f" Model name (Enter to keep '{current_model}')")
|
|
||||||
if custom:
|
|
||||||
_set_default_model(config, custom)
|
|
||||||
save_env_value("LLM_MODEL", custom)
|
|
||||||
elif selected_provider == "openai-codex":
|
elif selected_provider == "openai-codex":
|
||||||
from hermes_cli.codex_models import get_codex_model_ids
|
from hermes_cli.codex_models import get_codex_model_ids
|
||||||
|
|
||||||
|
|
@ -1146,12 +1099,10 @@ def setup_model_provider(config: dict):
|
||||||
)
|
)
|
||||||
if model_idx < len(codex_models):
|
if model_idx < len(codex_models):
|
||||||
_set_default_model(config, codex_models[model_idx])
|
_set_default_model(config, codex_models[model_idx])
|
||||||
save_env_value("LLM_MODEL", codex_models[model_idx])
|
|
||||||
elif model_idx == len(codex_models):
|
elif model_idx == len(codex_models):
|
||||||
custom = prompt("Enter model name")
|
custom = prompt("Enter model name")
|
||||||
if custom:
|
if custom:
|
||||||
_set_default_model(config, custom)
|
_set_default_model(config, custom)
|
||||||
save_env_value("LLM_MODEL", custom)
|
|
||||||
_update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
|
_update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
|
||||||
_set_model_provider(config, "openai-codex", DEFAULT_CODEX_BASE_URL)
|
_set_model_provider(config, "openai-codex", DEFAULT_CODEX_BASE_URL)
|
||||||
elif selected_provider == "zai":
|
elif selected_provider == "zai":
|
||||||
|
|
@ -1172,12 +1123,10 @@ def setup_model_provider(config: dict):
|
||||||
|
|
||||||
if model_idx < len(zai_models):
|
if model_idx < len(zai_models):
|
||||||
_set_default_model(config, zai_models[model_idx])
|
_set_default_model(config, zai_models[model_idx])
|
||||||
save_env_value("LLM_MODEL", zai_models[model_idx])
|
|
||||||
elif model_idx == len(zai_models):
|
elif model_idx == len(zai_models):
|
||||||
custom = prompt("Enter model name")
|
custom = prompt("Enter model name")
|
||||||
if custom:
|
if custom:
|
||||||
_set_default_model(config, custom)
|
_set_default_model(config, custom)
|
||||||
save_env_value("LLM_MODEL", custom)
|
|
||||||
# else: keep current
|
# else: keep current
|
||||||
elif selected_provider == "kimi-coding":
|
elif selected_provider == "kimi-coding":
|
||||||
kimi_models = ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"]
|
kimi_models = ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"]
|
||||||
|
|
@ -1190,12 +1139,10 @@ def setup_model_provider(config: dict):
|
||||||
|
|
||||||
if model_idx < len(kimi_models):
|
if model_idx < len(kimi_models):
|
||||||
_set_default_model(config, kimi_models[model_idx])
|
_set_default_model(config, kimi_models[model_idx])
|
||||||
save_env_value("LLM_MODEL", kimi_models[model_idx])
|
|
||||||
elif model_idx == len(kimi_models):
|
elif model_idx == len(kimi_models):
|
||||||
custom = prompt("Enter model name")
|
custom = prompt("Enter model name")
|
||||||
if custom:
|
if custom:
|
||||||
_set_default_model(config, custom)
|
_set_default_model(config, custom)
|
||||||
save_env_value("LLM_MODEL", custom)
|
|
||||||
# else: keep current
|
# else: keep current
|
||||||
elif selected_provider in ("minimax", "minimax-cn"):
|
elif selected_provider in ("minimax", "minimax-cn"):
|
||||||
minimax_models = ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]
|
minimax_models = ["MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]
|
||||||
|
|
@ -1208,12 +1155,10 @@ def setup_model_provider(config: dict):
|
||||||
|
|
||||||
if model_idx < len(minimax_models):
|
if model_idx < len(minimax_models):
|
||||||
_set_default_model(config, minimax_models[model_idx])
|
_set_default_model(config, minimax_models[model_idx])
|
||||||
save_env_value("LLM_MODEL", minimax_models[model_idx])
|
|
||||||
elif model_idx == len(minimax_models):
|
elif model_idx == len(minimax_models):
|
||||||
custom = prompt("Enter model name")
|
custom = prompt("Enter model name")
|
||||||
if custom:
|
if custom:
|
||||||
_set_default_model(config, custom)
|
_set_default_model(config, custom)
|
||||||
save_env_value("LLM_MODEL", custom)
|
|
||||||
# else: keep current
|
# else: keep current
|
||||||
else:
|
else:
|
||||||
# Static list for OpenRouter / fallback (from canonical list)
|
# Static list for OpenRouter / fallback (from canonical list)
|
||||||
|
|
@ -1230,12 +1175,10 @@ def setup_model_provider(config: dict):
|
||||||
|
|
||||||
if model_idx < len(ids):
|
if model_idx < len(ids):
|
||||||
_set_default_model(config, ids[model_idx])
|
_set_default_model(config, ids[model_idx])
|
||||||
save_env_value("LLM_MODEL", ids[model_idx])
|
|
||||||
elif model_idx == len(ids): # Custom
|
elif model_idx == len(ids): # Custom
|
||||||
custom = prompt("Enter model name (e.g., anthropic/claude-opus-4.6)")
|
custom = prompt("Enter model name (e.g., anthropic/claude-opus-4.6)")
|
||||||
if custom:
|
if custom:
|
||||||
_set_default_model(config, custom)
|
_set_default_model(config, custom)
|
||||||
save_env_value("LLM_MODEL", custom)
|
|
||||||
# else: Keep current
|
# else: Keep current
|
||||||
|
|
||||||
_final_model = config.get("model", "")
|
_final_model = config.get("model", "")
|
||||||
|
|
|
||||||
|
|
@ -189,29 +189,30 @@ class MiniSWERunner:
|
||||||
)
|
)
|
||||||
self.logger = logging.getLogger(__name__)
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Initialize OpenAI client - defaults to OpenRouter
|
# Initialize LLM client via centralized provider router.
|
||||||
from openai import OpenAI
|
# If explicit api_key/base_url are provided (e.g. from CLI args),
|
||||||
|
# construct directly. Otherwise use the router for OpenRouter.
|
||||||
client_kwargs = {}
|
if api_key or base_url:
|
||||||
|
from openai import OpenAI
|
||||||
# Default to OpenRouter if no base_url provided
|
client_kwargs = {
|
||||||
if base_url:
|
"base_url": base_url or "https://openrouter.ai/api/v1",
|
||||||
client_kwargs["base_url"] = base_url
|
"api_key": api_key or os.getenv(
|
||||||
|
"OPENROUTER_API_KEY",
|
||||||
|
os.getenv("ANTHROPIC_API_KEY",
|
||||||
|
os.getenv("OPENAI_API_KEY", ""))),
|
||||||
|
}
|
||||||
|
self.client = OpenAI(**client_kwargs)
|
||||||
else:
|
else:
|
||||||
client_kwargs["base_url"] = "https://openrouter.ai/api/v1"
|
from agent.auxiliary_client import resolve_provider_client
|
||||||
|
self.client, _ = resolve_provider_client("openrouter", model=model)
|
||||||
|
if self.client is None:
|
||||||
|
# Fallback: try auto-detection
|
||||||
# Handle API key - OpenRouter is the primary provider
|
self.client, _ = resolve_provider_client("auto", model=model)
|
||||||
if api_key:
|
if self.client is None:
|
||||||
client_kwargs["api_key"] = api_key
|
from openai import OpenAI
|
||||||
else:
|
self.client = OpenAI(
|
||||||
client_kwargs["api_key"] = os.getenv(
|
base_url="https://openrouter.ai/api/v1",
|
||||||
"OPENROUTER_API_KEY",
|
api_key=os.getenv("OPENROUTER_API_KEY", ""))
|
||||||
os.getenv("ANTHROPIC_API_KEY", os.getenv("OPENAI_API_KEY", ""))
|
|
||||||
)
|
|
||||||
|
|
||||||
self.client = OpenAI(**client_kwargs)
|
|
||||||
|
|
||||||
# Environment will be created per-task
|
# Environment will be created per-task
|
||||||
self.env = None
|
self.env = None
|
||||||
|
|
|
||||||
234
run_agent.py
234
run_agent.py
|
|
@ -418,36 +418,50 @@ class AIAgent:
|
||||||
]:
|
]:
|
||||||
logging.getLogger(quiet_logger).setLevel(logging.ERROR)
|
logging.getLogger(quiet_logger).setLevel(logging.ERROR)
|
||||||
|
|
||||||
# Initialize OpenAI client - defaults to OpenRouter
|
# Initialize OpenAI client via centralized provider router.
|
||||||
client_kwargs = {}
|
# The router handles auth resolution, base URL, headers, and
|
||||||
|
# Codex wrapping for all known providers.
|
||||||
# Default to OpenRouter if no base_url provided
|
# raw_codex=True because the main agent needs direct responses.stream()
|
||||||
if base_url:
|
# access for Codex Responses API streaming.
|
||||||
client_kwargs["base_url"] = base_url
|
if api_key and base_url:
|
||||||
|
# Explicit credentials from CLI/gateway — construct directly.
|
||||||
|
# The runtime provider resolver already handled auth for us.
|
||||||
|
client_kwargs = {"api_key": api_key, "base_url": base_url}
|
||||||
|
effective_base = base_url
|
||||||
|
if "openrouter" in effective_base.lower():
|
||||||
|
client_kwargs["default_headers"] = {
|
||||||
|
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
||||||
|
"X-OpenRouter-Title": "Hermes Agent",
|
||||||
|
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||||
|
}
|
||||||
|
elif "api.kimi.com" in effective_base.lower():
|
||||||
|
client_kwargs["default_headers"] = {
|
||||||
|
"User-Agent": "KimiCLI/1.0",
|
||||||
|
}
|
||||||
else:
|
else:
|
||||||
client_kwargs["base_url"] = OPENROUTER_BASE_URL
|
# No explicit creds — use the centralized provider router
|
||||||
|
from agent.auxiliary_client import resolve_provider_client
|
||||||
# Handle API key - OpenRouter is the primary provider
|
_routed_client, _ = resolve_provider_client(
|
||||||
if api_key:
|
self.provider or "auto", model=self.model, raw_codex=True)
|
||||||
client_kwargs["api_key"] = api_key
|
if _routed_client is not None:
|
||||||
else:
|
client_kwargs = {
|
||||||
# Primary: OPENROUTER_API_KEY, fallback to direct provider keys
|
"api_key": _routed_client.api_key,
|
||||||
client_kwargs["api_key"] = os.getenv("OPENROUTER_API_KEY", "")
|
"base_url": str(_routed_client.base_url),
|
||||||
|
}
|
||||||
# OpenRouter app attribution — shows hermes-agent in rankings/analytics
|
# Preserve any default_headers the router set
|
||||||
effective_base = client_kwargs.get("base_url", "")
|
if hasattr(_routed_client, '_default_headers') and _routed_client._default_headers:
|
||||||
if "openrouter" in effective_base.lower():
|
client_kwargs["default_headers"] = dict(_routed_client._default_headers)
|
||||||
client_kwargs["default_headers"] = {
|
else:
|
||||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
# Final fallback: try raw OpenRouter key
|
||||||
"X-OpenRouter-Title": "Hermes Agent",
|
client_kwargs = {
|
||||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
"api_key": os.getenv("OPENROUTER_API_KEY", ""),
|
||||||
}
|
"base_url": OPENROUTER_BASE_URL,
|
||||||
elif "api.kimi.com" in effective_base.lower():
|
"default_headers": {
|
||||||
# Kimi Code API requires a recognized coding-agent User-Agent
|
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
||||||
# (see https://github.com/MoonshotAI/kimi-cli)
|
"X-OpenRouter-Title": "Hermes Agent",
|
||||||
client_kwargs["default_headers"] = {
|
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||||
"User-Agent": "KimiCLI/1.0",
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
self._client_kwargs = client_kwargs # stored for rebuilding after interrupt
|
self._client_kwargs = client_kwargs # stored for rebuilding after interrupt
|
||||||
try:
|
try:
|
||||||
|
|
@ -2243,75 +2257,6 @@ class AIAgent:
|
||||||
|
|
||||||
# ── Provider fallback ──────────────────────────────────────────────────
|
# ── Provider fallback ──────────────────────────────────────────────────
|
||||||
|
|
||||||
# API-key providers: provider → (base_url, [env_var_names])
|
|
||||||
_FALLBACK_API_KEY_PROVIDERS = {
|
|
||||||
"openrouter": (OPENROUTER_BASE_URL, ["OPENROUTER_API_KEY"]),
|
|
||||||
"zai": ("https://api.z.ai/api/paas/v4", ["ZAI_API_KEY", "Z_AI_API_KEY"]),
|
|
||||||
"kimi-coding": ("https://api.moonshot.ai/v1", ["KIMI_API_KEY"]),
|
|
||||||
"minimax": ("https://api.minimax.io/v1", ["MINIMAX_API_KEY"]),
|
|
||||||
"minimax-cn": ("https://api.minimaxi.com/v1", ["MINIMAX_CN_API_KEY"]),
|
|
||||||
}
|
|
||||||
|
|
||||||
# OAuth providers: provider → (resolver_import_path, api_mode)
|
|
||||||
# Each resolver returns {"api_key": ..., "base_url": ...}.
|
|
||||||
_FALLBACK_OAUTH_PROVIDERS = {
|
|
||||||
"openai-codex": ("resolve_codex_runtime_credentials", "codex_responses"),
|
|
||||||
"nous": ("resolve_nous_runtime_credentials", "chat_completions"),
|
|
||||||
}
|
|
||||||
|
|
||||||
def _resolve_fallback_credentials(
|
|
||||||
self, fb_provider: str, fb_config: dict
|
|
||||||
) -> Optional[tuple]:
|
|
||||||
"""Resolve credentials for a fallback provider.
|
|
||||||
|
|
||||||
Returns (api_key, base_url, api_mode) on success, or None on failure.
|
|
||||||
Handles three cases:
|
|
||||||
1. OAuth providers (openai-codex, nous) — call credential resolver
|
|
||||||
2. API-key providers (openrouter, zai, etc.) — read env var
|
|
||||||
3. Custom endpoints — use base_url + api_key_env from config
|
|
||||||
"""
|
|
||||||
# ── 1. OAuth providers ────────────────────────────────────────
|
|
||||||
if fb_provider in self._FALLBACK_OAUTH_PROVIDERS:
|
|
||||||
resolver_name, api_mode = self._FALLBACK_OAUTH_PROVIDERS[fb_provider]
|
|
||||||
try:
|
|
||||||
import hermes_cli.auth as _auth
|
|
||||||
resolver = getattr(_auth, resolver_name)
|
|
||||||
creds = resolver()
|
|
||||||
return creds["api_key"], creds["base_url"], api_mode
|
|
||||||
except Exception as e:
|
|
||||||
logging.warning(
|
|
||||||
"Fallback to %s failed (credential resolution): %s",
|
|
||||||
fb_provider, e,
|
|
||||||
)
|
|
||||||
return None
|
|
||||||
|
|
||||||
# ── 2. API-key providers ──────────────────────────────────────
|
|
||||||
fb_key = (fb_config.get("api_key") or "").strip()
|
|
||||||
if not fb_key:
|
|
||||||
key_env = (fb_config.get("api_key_env") or "").strip()
|
|
||||||
if key_env:
|
|
||||||
fb_key = os.getenv(key_env, "")
|
|
||||||
elif fb_provider in self._FALLBACK_API_KEY_PROVIDERS:
|
|
||||||
for env_var in self._FALLBACK_API_KEY_PROVIDERS[fb_provider][1]:
|
|
||||||
fb_key = os.getenv(env_var, "")
|
|
||||||
if fb_key:
|
|
||||||
break
|
|
||||||
if not fb_key:
|
|
||||||
logging.warning(
|
|
||||||
"Fallback model configured but no API key found for provider '%s'",
|
|
||||||
fb_provider,
|
|
||||||
)
|
|
||||||
return None
|
|
||||||
|
|
||||||
# ── 3. Resolve base URL ───────────────────────────────────────
|
|
||||||
fb_base_url = (fb_config.get("base_url") or "").strip()
|
|
||||||
if not fb_base_url and fb_provider in self._FALLBACK_API_KEY_PROVIDERS:
|
|
||||||
fb_base_url = self._FALLBACK_API_KEY_PROVIDERS[fb_provider][0]
|
|
||||||
if not fb_base_url:
|
|
||||||
fb_base_url = OPENROUTER_BASE_URL
|
|
||||||
|
|
||||||
return fb_key, fb_base_url, "chat_completions"
|
|
||||||
|
|
||||||
def _try_activate_fallback(self) -> bool:
|
def _try_activate_fallback(self) -> bool:
|
||||||
"""Switch to the configured fallback model/provider.
|
"""Switch to the configured fallback model/provider.
|
||||||
|
|
||||||
|
|
@ -2319,6 +2264,10 @@ class AIAgent:
|
||||||
OpenAI client, model slug, and provider in-place so the retry loop
|
OpenAI client, model slug, and provider in-place so the retry loop
|
||||||
can continue with the new backend. One-shot: returns False if
|
can continue with the new backend. One-shot: returns False if
|
||||||
already activated or not configured.
|
already activated or not configured.
|
||||||
|
|
||||||
|
Uses the centralized provider router (resolve_provider_client) for
|
||||||
|
auth resolution and client construction — no duplicated provider→key
|
||||||
|
mappings.
|
||||||
"""
|
"""
|
||||||
if self._fallback_activated or not self._fallback_model:
|
if self._fallback_activated or not self._fallback_model:
|
||||||
return False
|
return False
|
||||||
|
|
@ -2329,25 +2278,31 @@ class AIAgent:
|
||||||
if not fb_provider or not fb_model:
|
if not fb_provider or not fb_model:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
resolved = self._resolve_fallback_credentials(fb_provider, fb)
|
# Use centralized router for client construction.
|
||||||
if resolved is None:
|
# raw_codex=True because the main agent needs direct responses.stream()
|
||||||
return False
|
# access for Codex providers.
|
||||||
fb_key, fb_base_url, fb_api_mode = resolved
|
|
||||||
|
|
||||||
# Build new client
|
|
||||||
try:
|
try:
|
||||||
client_kwargs = {"api_key": fb_key, "base_url": fb_base_url}
|
from agent.auxiliary_client import resolve_provider_client
|
||||||
if "openrouter" in fb_base_url.lower():
|
fb_client, _ = resolve_provider_client(
|
||||||
client_kwargs["default_headers"] = {
|
fb_provider, model=fb_model, raw_codex=True)
|
||||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
if fb_client is None:
|
||||||
"X-OpenRouter-Title": "Hermes Agent",
|
logging.warning(
|
||||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
"Fallback to %s failed: provider not configured",
|
||||||
}
|
fb_provider)
|
||||||
elif "api.kimi.com" in fb_base_url.lower():
|
return False
|
||||||
client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
|
||||||
|
|
||||||
self.client = OpenAI(**client_kwargs)
|
# Determine api_mode from provider
|
||||||
self._client_kwargs = client_kwargs
|
fb_api_mode = "chat_completions"
|
||||||
|
if fb_provider == "openai-codex":
|
||||||
|
fb_api_mode = "codex_responses"
|
||||||
|
fb_base_url = str(fb_client.base_url)
|
||||||
|
|
||||||
|
# Swap client and config in-place
|
||||||
|
self.client = fb_client
|
||||||
|
self._client_kwargs = {
|
||||||
|
"api_key": fb_client.api_key,
|
||||||
|
"base_url": fb_base_url,
|
||||||
|
}
|
||||||
old_model = self.model
|
old_model = self.model
|
||||||
self.model = fb_model
|
self.model = fb_model
|
||||||
self.provider = fb_provider
|
self.provider = fb_provider
|
||||||
|
|
@ -2444,16 +2399,26 @@ class AIAgent:
|
||||||
|
|
||||||
extra_body = {}
|
extra_body = {}
|
||||||
|
|
||||||
if provider_preferences:
|
|
||||||
extra_body["provider"] = provider_preferences
|
|
||||||
|
|
||||||
_is_openrouter = "openrouter" in self.base_url.lower()
|
_is_openrouter = "openrouter" in self.base_url.lower()
|
||||||
|
|
||||||
|
# Provider preferences (only, ignore, order, sort) are OpenRouter-
|
||||||
|
# specific. Only send to OpenRouter-compatible endpoints.
|
||||||
|
# TODO: Nous Portal will add transparent proxy support — re-enable
|
||||||
|
# for _is_nous when their backend is updated.
|
||||||
|
if provider_preferences and _is_openrouter:
|
||||||
|
extra_body["provider"] = provider_preferences
|
||||||
_is_nous = "nousresearch" in self.base_url.lower()
|
_is_nous = "nousresearch" in self.base_url.lower()
|
||||||
|
|
||||||
_is_mistral = "api.mistral.ai" in self.base_url.lower()
|
_is_mistral = "api.mistral.ai" in self.base_url.lower()
|
||||||
if (_is_openrouter or _is_nous) and not _is_mistral:
|
if (_is_openrouter or _is_nous) and not _is_mistral:
|
||||||
if self.reasoning_config is not None:
|
if self.reasoning_config is not None:
|
||||||
extra_body["reasoning"] = self.reasoning_config
|
rc = dict(self.reasoning_config)
|
||||||
|
# Nous Portal requires reasoning enabled — don't send
|
||||||
|
# enabled=false to it (would cause 400).
|
||||||
|
if _is_nous and rc.get("enabled") is False:
|
||||||
|
pass # omit reasoning entirely for Nous when disabled
|
||||||
|
else:
|
||||||
|
extra_body["reasoning"] = rc
|
||||||
else:
|
else:
|
||||||
extra_body["reasoning"] = {
|
extra_body["reasoning"] = {
|
||||||
"enabled": True,
|
"enabled": True,
|
||||||
|
|
@ -2630,19 +2595,22 @@ class AIAgent:
|
||||||
|
|
||||||
# Use auxiliary client for the flush call when available --
|
# Use auxiliary client for the flush call when available --
|
||||||
# it's cheaper and avoids Codex Responses API incompatibility.
|
# it's cheaper and avoids Codex Responses API incompatibility.
|
||||||
from agent.auxiliary_client import get_text_auxiliary_client
|
from agent.auxiliary_client import call_llm as _call_llm
|
||||||
aux_client, aux_model = get_text_auxiliary_client()
|
_aux_available = True
|
||||||
|
try:
|
||||||
|
response = _call_llm(
|
||||||
|
task="flush_memories",
|
||||||
|
messages=api_messages,
|
||||||
|
tools=[memory_tool_def],
|
||||||
|
temperature=0.3,
|
||||||
|
max_tokens=5120,
|
||||||
|
timeout=30.0,
|
||||||
|
)
|
||||||
|
except RuntimeError:
|
||||||
|
_aux_available = False
|
||||||
|
response = None
|
||||||
|
|
||||||
if aux_client:
|
if not _aux_available and self.api_mode == "codex_responses":
|
||||||
api_kwargs = {
|
|
||||||
"model": aux_model,
|
|
||||||
"messages": api_messages,
|
|
||||||
"tools": [memory_tool_def],
|
|
||||||
"temperature": 0.3,
|
|
||||||
"max_tokens": 5120,
|
|
||||||
}
|
|
||||||
response = aux_client.chat.completions.create(**api_kwargs, timeout=30.0)
|
|
||||||
elif self.api_mode == "codex_responses":
|
|
||||||
# No auxiliary client -- use the Codex Responses path directly
|
# No auxiliary client -- use the Codex Responses path directly
|
||||||
codex_kwargs = self._build_api_kwargs(api_messages)
|
codex_kwargs = self._build_api_kwargs(api_messages)
|
||||||
codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
|
codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
|
||||||
|
|
@ -2650,7 +2618,7 @@ class AIAgent:
|
||||||
if "max_output_tokens" in codex_kwargs:
|
if "max_output_tokens" in codex_kwargs:
|
||||||
codex_kwargs["max_output_tokens"] = 5120
|
codex_kwargs["max_output_tokens"] = 5120
|
||||||
response = self._run_codex_stream(codex_kwargs)
|
response = self._run_codex_stream(codex_kwargs)
|
||||||
else:
|
elif not _aux_available:
|
||||||
api_kwargs = {
|
api_kwargs = {
|
||||||
"model": self.model,
|
"model": self.model,
|
||||||
"messages": api_messages,
|
"messages": api_messages,
|
||||||
|
|
@ -2662,7 +2630,7 @@ class AIAgent:
|
||||||
|
|
||||||
# Extract tool calls from the response, handling both API formats
|
# Extract tool calls from the response, handling both API formats
|
||||||
tool_calls = []
|
tool_calls = []
|
||||||
if self.api_mode == "codex_responses" and not aux_client:
|
if self.api_mode == "codex_responses" and not _aux_available:
|
||||||
assistant_msg, _ = self._normalize_codex_response(response)
|
assistant_msg, _ = self._normalize_codex_response(response)
|
||||||
if assistant_msg and assistant_msg.tool_calls:
|
if assistant_msg and assistant_msg.tool_calls:
|
||||||
tool_calls = assistant_msg.tool_calls
|
tool_calls = assistant_msg.tool_calls
|
||||||
|
|
|
||||||
|
|
@ -9,8 +9,7 @@ from agent.context_compressor import ContextCompressor
|
||||||
@pytest.fixture()
|
@pytest.fixture()
|
||||||
def compressor():
|
def compressor():
|
||||||
"""Create a ContextCompressor with mocked dependencies."""
|
"""Create a ContextCompressor with mocked dependencies."""
|
||||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||||
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(None, None)):
|
|
||||||
c = ContextCompressor(
|
c = ContextCompressor(
|
||||||
model="test/model",
|
model="test/model",
|
||||||
threshold_percent=0.85,
|
threshold_percent=0.85,
|
||||||
|
|
@ -119,14 +118,11 @@ class TestGenerateSummaryNoneContent:
|
||||||
"""Regression: content=None (from tool-call-only assistant messages) must not crash."""
|
"""Regression: content=None (from tool-call-only assistant messages) must not crash."""
|
||||||
|
|
||||||
def test_none_content_does_not_crash(self):
|
def test_none_content_does_not_crash(self):
|
||||||
mock_client = MagicMock()
|
|
||||||
mock_response = MagicMock()
|
mock_response = MagicMock()
|
||||||
mock_response.choices = [MagicMock()]
|
mock_response.choices = [MagicMock()]
|
||||||
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: tool calls happened"
|
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: tool calls happened"
|
||||||
mock_client.chat.completions.create.return_value = mock_response
|
|
||||||
|
|
||||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||||
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
|
|
||||||
c = ContextCompressor(model="test", quiet_mode=True)
|
c = ContextCompressor(model="test", quiet_mode=True)
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
|
|
@ -139,14 +135,14 @@ class TestGenerateSummaryNoneContent:
|
||||||
{"role": "user", "content": "thanks"},
|
{"role": "user", "content": "thanks"},
|
||||||
]
|
]
|
||||||
|
|
||||||
summary = c._generate_summary(messages)
|
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||||
|
summary = c._generate_summary(messages)
|
||||||
assert isinstance(summary, str)
|
assert isinstance(summary, str)
|
||||||
assert "CONTEXT SUMMARY" in summary
|
assert "CONTEXT SUMMARY" in summary
|
||||||
|
|
||||||
def test_none_content_in_system_message_compress(self):
|
def test_none_content_in_system_message_compress(self):
|
||||||
"""System message with content=None should not crash during compress."""
|
"""System message with content=None should not crash during compress."""
|
||||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||||
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(None, None)):
|
|
||||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||||
|
|
||||||
msgs = [{"role": "system", "content": None}] + [
|
msgs = [{"role": "system", "content": None}] + [
|
||||||
|
|
@ -165,12 +161,12 @@ class TestCompressWithClient:
|
||||||
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened"
|
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened"
|
||||||
mock_client.chat.completions.create.return_value = mock_response
|
mock_client.chat.completions.create.return_value = mock_response
|
||||||
|
|
||||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||||
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
|
|
||||||
c = ContextCompressor(model="test", quiet_mode=True)
|
c = ContextCompressor(model="test", quiet_mode=True)
|
||||||
|
|
||||||
msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
|
msgs = [{"role": "user" if i % 2 == 0 else "assistant", "content": f"msg {i}"} for i in range(10)]
|
||||||
result = c.compress(msgs)
|
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||||
|
result = c.compress(msgs)
|
||||||
|
|
||||||
# Should have summary message in the middle
|
# Should have summary message in the middle
|
||||||
contents = [m.get("content", "") for m in result]
|
contents = [m.get("content", "") for m in result]
|
||||||
|
|
@ -184,8 +180,7 @@ class TestCompressWithClient:
|
||||||
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: compressed middle"
|
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: compressed middle"
|
||||||
mock_client.chat.completions.create.return_value = mock_response
|
mock_client.chat.completions.create.return_value = mock_response
|
||||||
|
|
||||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||||
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
|
|
||||||
c = ContextCompressor(
|
c = ContextCompressor(
|
||||||
model="test",
|
model="test",
|
||||||
quiet_mode=True,
|
quiet_mode=True,
|
||||||
|
|
@ -212,7 +207,8 @@ class TestCompressWithClient:
|
||||||
{"role": "user", "content": "later 4"},
|
{"role": "user", "content": "later 4"},
|
||||||
]
|
]
|
||||||
|
|
||||||
result = c.compress(msgs)
|
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||||
|
result = c.compress(msgs)
|
||||||
|
|
||||||
answered_ids = {
|
answered_ids = {
|
||||||
msg.get("tool_call_id")
|
msg.get("tool_call_id")
|
||||||
|
|
@ -232,8 +228,7 @@ class TestCompressWithClient:
|
||||||
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened"
|
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened"
|
||||||
mock_client.chat.completions.create.return_value = mock_response
|
mock_client.chat.completions.create.return_value = mock_response
|
||||||
|
|
||||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||||
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
|
|
||||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||||
|
|
||||||
# Last head message (index 1) is "assistant" → summary should be "user"
|
# Last head message (index 1) is "assistant" → summary should be "user"
|
||||||
|
|
@ -245,7 +240,8 @@ class TestCompressWithClient:
|
||||||
{"role": "user", "content": "msg 4"},
|
{"role": "user", "content": "msg 4"},
|
||||||
{"role": "assistant", "content": "msg 5"},
|
{"role": "assistant", "content": "msg 5"},
|
||||||
]
|
]
|
||||||
result = c.compress(msgs)
|
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||||
|
result = c.compress(msgs)
|
||||||
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
|
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
|
||||||
assert len(summary_msg) == 1
|
assert len(summary_msg) == 1
|
||||||
assert summary_msg[0]["role"] == "user"
|
assert summary_msg[0]["role"] == "user"
|
||||||
|
|
@ -258,8 +254,7 @@ class TestCompressWithClient:
|
||||||
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened"
|
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened"
|
||||||
mock_client.chat.completions.create.return_value = mock_response
|
mock_client.chat.completions.create.return_value = mock_response
|
||||||
|
|
||||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||||
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
|
|
||||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3, protect_last_n=2)
|
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3, protect_last_n=2)
|
||||||
|
|
||||||
# Last head message (index 2) is "user" → summary should be "assistant"
|
# Last head message (index 2) is "user" → summary should be "assistant"
|
||||||
|
|
@ -273,20 +268,18 @@ class TestCompressWithClient:
|
||||||
{"role": "user", "content": "msg 6"},
|
{"role": "user", "content": "msg 6"},
|
||||||
{"role": "assistant", "content": "msg 7"},
|
{"role": "assistant", "content": "msg 7"},
|
||||||
]
|
]
|
||||||
result = c.compress(msgs)
|
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||||
|
result = c.compress(msgs)
|
||||||
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
|
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
|
||||||
assert len(summary_msg) == 1
|
assert len(summary_msg) == 1
|
||||||
assert summary_msg[0]["role"] == "assistant"
|
assert summary_msg[0]["role"] == "assistant"
|
||||||
|
|
||||||
def test_summarization_does_not_start_tail_with_tool_outputs(self):
|
def test_summarization_does_not_start_tail_with_tool_outputs(self):
|
||||||
mock_client = MagicMock()
|
|
||||||
mock_response = MagicMock()
|
mock_response = MagicMock()
|
||||||
mock_response.choices = [MagicMock()]
|
mock_response.choices = [MagicMock()]
|
||||||
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: compressed middle"
|
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: compressed middle"
|
||||||
mock_client.chat.completions.create.return_value = mock_response
|
|
||||||
|
|
||||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000), \
|
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||||
patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")):
|
|
||||||
c = ContextCompressor(
|
c = ContextCompressor(
|
||||||
model="test",
|
model="test",
|
||||||
quiet_mode=True,
|
quiet_mode=True,
|
||||||
|
|
@ -309,7 +302,8 @@ class TestCompressWithClient:
|
||||||
{"role": "user", "content": "latest user"},
|
{"role": "user", "content": "latest user"},
|
||||||
]
|
]
|
||||||
|
|
||||||
result = c.compress(msgs)
|
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||||
|
result = c.compress(msgs)
|
||||||
|
|
||||||
called_ids = {
|
called_ids = {
|
||||||
tc["id"]
|
tc["id"]
|
||||||
|
|
|
||||||
|
|
@ -103,7 +103,7 @@ def test_custom_setup_clears_active_oauth_provider(tmp_path, monkeypatch):
|
||||||
|
|
||||||
config = load_config()
|
config = load_config()
|
||||||
|
|
||||||
monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 4)
|
monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 3)
|
||||||
|
|
||||||
prompt_values = iter(
|
prompt_values = iter(
|
||||||
[
|
[
|
||||||
|
|
|
||||||
|
|
@ -579,7 +579,7 @@ class WebToolsTester:
|
||||||
"results": self.test_results,
|
"results": self.test_results,
|
||||||
"environment": {
|
"environment": {
|
||||||
"firecrawl_api_key": check_firecrawl_api_key(),
|
"firecrawl_api_key": check_firecrawl_api_key(),
|
||||||
"nous_api_key": check_auxiliary_model(),
|
"auxiliary_model": check_auxiliary_model(),
|
||||||
"debug_mode": get_debug_session_info()["enabled"]
|
"debug_mode": get_debug_session_info()["enabled"]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -229,13 +229,14 @@ class TestVisionModelOverride:
|
||||||
|
|
||||||
def test_default_model_when_no_override(self, monkeypatch):
|
def test_default_model_when_no_override(self, monkeypatch):
|
||||||
monkeypatch.delenv("AUXILIARY_VISION_MODEL", raising=False)
|
monkeypatch.delenv("AUXILIARY_VISION_MODEL", raising=False)
|
||||||
from tools.vision_tools import _handle_vision_analyze, DEFAULT_VISION_MODEL
|
from tools.vision_tools import _handle_vision_analyze
|
||||||
with patch("tools.vision_tools.vision_analyze_tool", new_callable=MagicMock) as mock_tool:
|
with patch("tools.vision_tools.vision_analyze_tool", new_callable=MagicMock) as mock_tool:
|
||||||
mock_tool.return_value = '{"success": true}'
|
mock_tool.return_value = '{"success": true}'
|
||||||
_handle_vision_analyze({"image_url": "http://test.jpg", "question": "test"})
|
_handle_vision_analyze({"image_url": "http://test.jpg", "question": "test"})
|
||||||
call_args = mock_tool.call_args
|
call_args = mock_tool.call_args
|
||||||
expected = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview"
|
# With no AUXILIARY_VISION_MODEL env var, model should be None
|
||||||
assert call_args[0][2] == expected
|
# (the centralized call_llm router picks the provider default)
|
||||||
|
assert call_args[0][2] is None
|
||||||
|
|
||||||
|
|
||||||
# ── DEFAULT_CONFIG shape tests ───────────────────────────────────────────────
|
# ── DEFAULT_CONFIG shape tests ───────────────────────────────────────────────
|
||||||
|
|
|
||||||
|
|
@ -93,8 +93,8 @@ class TestModelCommand:
|
||||||
output = capsys.readouterr().out
|
output = capsys.readouterr().out
|
||||||
assert "anthropic/claude-opus-4.6" in output
|
assert "anthropic/claude-opus-4.6" in output
|
||||||
assert "OpenRouter" in output
|
assert "OpenRouter" in output
|
||||||
assert "Available models" in output
|
assert "Authenticated providers" in output or "Switch model" in output
|
||||||
assert "provider:model-name" in output
|
assert "provider" in output and "model" in output
|
||||||
|
|
||||||
# -- provider switching tests -------------------------------------------
|
# -- provider switching tests -------------------------------------------
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -197,21 +197,28 @@ def test_codex_provider_replaces_incompatible_default_model(monkeypatch):
|
||||||
assert shell.model == "gpt-5.2-codex"
|
assert shell.model == "gpt-5.2-codex"
|
||||||
|
|
||||||
|
|
||||||
def test_codex_provider_trusts_explicit_envvar_model(monkeypatch):
|
def test_codex_provider_uses_config_model(monkeypatch):
|
||||||
"""When the user explicitly sets LLM_MODEL, we trust their choice and
|
"""Model comes from config.yaml, not LLM_MODEL env var.
|
||||||
let the API be the judge — even if it's a non-OpenAI model. Only
|
Config.yaml is the single source of truth to avoid multi-agent conflicts."""
|
||||||
provider prefixes are stripped; the bare model passes through."""
|
|
||||||
cli = _import_cli()
|
cli = _import_cli()
|
||||||
|
|
||||||
monkeypatch.setenv("LLM_MODEL", "claude-opus-4-6")
|
# LLM_MODEL env var should be IGNORED (even if set)
|
||||||
|
monkeypatch.setenv("LLM_MODEL", "should-be-ignored")
|
||||||
monkeypatch.delenv("OPENAI_MODEL", raising=False)
|
monkeypatch.delenv("OPENAI_MODEL", raising=False)
|
||||||
|
|
||||||
|
# Set model via config
|
||||||
|
monkeypatch.setitem(cli.CLI_CONFIG, "model", {
|
||||||
|
"default": "gpt-5.2-codex",
|
||||||
|
"provider": "openai-codex",
|
||||||
|
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||||
|
})
|
||||||
|
|
||||||
def _runtime_resolve(**kwargs):
|
def _runtime_resolve(**kwargs):
|
||||||
return {
|
return {
|
||||||
"provider": "openai-codex",
|
"provider": "openai-codex",
|
||||||
"api_mode": "codex_responses",
|
"api_mode": "codex_responses",
|
||||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||||
"api_key": "test-key",
|
"api_key": "fake-codex-token",
|
||||||
"source": "env/config",
|
"source": "env/config",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -220,11 +227,12 @@ def test_codex_provider_trusts_explicit_envvar_model(monkeypatch):
|
||||||
|
|
||||||
shell = cli.HermesCLI(compact=True, max_turns=1)
|
shell = cli.HermesCLI(compact=True, max_turns=1)
|
||||||
|
|
||||||
assert shell._model_is_default is False
|
|
||||||
assert shell._ensure_runtime_credentials() is True
|
assert shell._ensure_runtime_credentials() is True
|
||||||
assert shell.provider == "openai-codex"
|
assert shell.provider == "openai-codex"
|
||||||
# User explicitly chose this model — it passes through untouched
|
# Model from config (may be normalized by codex provider logic)
|
||||||
assert shell.model == "claude-opus-4-6"
|
assert "codex" in shell.model.lower()
|
||||||
|
# LLM_MODEL env var is NOT used
|
||||||
|
assert shell.model != "should-be-ignored"
|
||||||
|
|
||||||
|
|
||||||
def test_codex_provider_preserves_explicit_codex_model(monkeypatch):
|
def test_codex_provider_preserves_explicit_codex_model(monkeypatch):
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ def _make_agent(fallback_model=None):
|
||||||
patch("run_agent.OpenAI"),
|
patch("run_agent.OpenAI"),
|
||||||
):
|
):
|
||||||
agent = AIAgent(
|
agent = AIAgent(
|
||||||
api_key="test-key-primary",
|
api_key="test-key",
|
||||||
quiet_mode=True,
|
quiet_mode=True,
|
||||||
skip_context_files=True,
|
skip_context_files=True,
|
||||||
skip_memory=True,
|
skip_memory=True,
|
||||||
|
|
@ -45,6 +45,14 @@ def _make_agent(fallback_model=None):
|
||||||
return agent
|
return agent
|
||||||
|
|
||||||
|
|
||||||
|
def _mock_resolve(base_url="https://openrouter.ai/api/v1", api_key="test-key"):
|
||||||
|
"""Helper to create a mock client for resolve_provider_client."""
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.api_key = api_key
|
||||||
|
mock_client.base_url = base_url
|
||||||
|
return mock_client
|
||||||
|
|
||||||
|
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
# _try_activate_fallback()
|
# _try_activate_fallback()
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
@ -71,9 +79,13 @@ class TestTryActivateFallback:
|
||||||
agent = _make_agent(
|
agent = _make_agent(
|
||||||
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
|
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
|
||||||
)
|
)
|
||||||
with (
|
mock_client = _mock_resolve(
|
||||||
patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-fallback-key"}),
|
api_key="sk-or-fallback-key",
|
||||||
patch("run_agent.OpenAI") as mock_openai,
|
base_url="https://openrouter.ai/api/v1",
|
||||||
|
)
|
||||||
|
with patch(
|
||||||
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
|
return_value=(mock_client, "anthropic/claude-sonnet-4"),
|
||||||
):
|
):
|
||||||
result = agent._try_activate_fallback()
|
result = agent._try_activate_fallback()
|
||||||
assert result is True
|
assert result is True
|
||||||
|
|
@ -81,36 +93,37 @@ class TestTryActivateFallback:
|
||||||
assert agent.model == "anthropic/claude-sonnet-4"
|
assert agent.model == "anthropic/claude-sonnet-4"
|
||||||
assert agent.provider == "openrouter"
|
assert agent.provider == "openrouter"
|
||||||
assert agent.api_mode == "chat_completions"
|
assert agent.api_mode == "chat_completions"
|
||||||
mock_openai.assert_called_once()
|
assert agent.client is mock_client
|
||||||
call_kwargs = mock_openai.call_args[1]
|
|
||||||
assert call_kwargs["api_key"] == "sk-or-fallback-key"
|
|
||||||
assert "openrouter" in call_kwargs["base_url"].lower()
|
|
||||||
# OpenRouter should get attribution headers
|
|
||||||
assert "default_headers" in call_kwargs
|
|
||||||
|
|
||||||
def test_activates_zai_fallback(self):
|
def test_activates_zai_fallback(self):
|
||||||
agent = _make_agent(
|
agent = _make_agent(
|
||||||
fallback_model={"provider": "zai", "model": "glm-5"},
|
fallback_model={"provider": "zai", "model": "glm-5"},
|
||||||
)
|
)
|
||||||
with (
|
mock_client = _mock_resolve(
|
||||||
patch.dict("os.environ", {"ZAI_API_KEY": "sk-zai-key"}),
|
api_key="sk-zai-key",
|
||||||
patch("run_agent.OpenAI") as mock_openai,
|
base_url="https://open.z.ai/api/v1",
|
||||||
|
)
|
||||||
|
with patch(
|
||||||
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
|
return_value=(mock_client, "glm-5"),
|
||||||
):
|
):
|
||||||
result = agent._try_activate_fallback()
|
result = agent._try_activate_fallback()
|
||||||
assert result is True
|
assert result is True
|
||||||
assert agent.model == "glm-5"
|
assert agent.model == "glm-5"
|
||||||
assert agent.provider == "zai"
|
assert agent.provider == "zai"
|
||||||
call_kwargs = mock_openai.call_args[1]
|
assert agent.client is mock_client
|
||||||
assert call_kwargs["api_key"] == "sk-zai-key"
|
|
||||||
assert "z.ai" in call_kwargs["base_url"].lower()
|
|
||||||
|
|
||||||
def test_activates_kimi_fallback(self):
|
def test_activates_kimi_fallback(self):
|
||||||
agent = _make_agent(
|
agent = _make_agent(
|
||||||
fallback_model={"provider": "kimi-coding", "model": "kimi-k2.5"},
|
fallback_model={"provider": "kimi-coding", "model": "kimi-k2.5"},
|
||||||
)
|
)
|
||||||
with (
|
mock_client = _mock_resolve(
|
||||||
patch.dict("os.environ", {"KIMI_API_KEY": "sk-kimi-key"}),
|
api_key="sk-kimi-key",
|
||||||
patch("run_agent.OpenAI"),
|
base_url="https://api.moonshot.ai/v1",
|
||||||
|
)
|
||||||
|
with patch(
|
||||||
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
|
return_value=(mock_client, "kimi-k2.5"),
|
||||||
):
|
):
|
||||||
assert agent._try_activate_fallback() is True
|
assert agent._try_activate_fallback() is True
|
||||||
assert agent.model == "kimi-k2.5"
|
assert agent.model == "kimi-k2.5"
|
||||||
|
|
@ -120,23 +133,30 @@ class TestTryActivateFallback:
|
||||||
agent = _make_agent(
|
agent = _make_agent(
|
||||||
fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
|
fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
|
||||||
)
|
)
|
||||||
with (
|
mock_client = _mock_resolve(
|
||||||
patch.dict("os.environ", {"MINIMAX_API_KEY": "sk-mm-key"}),
|
api_key="sk-mm-key",
|
||||||
patch("run_agent.OpenAI") as mock_openai,
|
base_url="https://api.minimax.io/v1",
|
||||||
|
)
|
||||||
|
with patch(
|
||||||
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
|
return_value=(mock_client, "MiniMax-M2.5"),
|
||||||
):
|
):
|
||||||
assert agent._try_activate_fallback() is True
|
assert agent._try_activate_fallback() is True
|
||||||
assert agent.model == "MiniMax-M2.5"
|
assert agent.model == "MiniMax-M2.5"
|
||||||
assert agent.provider == "minimax"
|
assert agent.provider == "minimax"
|
||||||
call_kwargs = mock_openai.call_args[1]
|
assert agent.client is mock_client
|
||||||
assert "minimax.io" in call_kwargs["base_url"]
|
|
||||||
|
|
||||||
def test_only_fires_once(self):
|
def test_only_fires_once(self):
|
||||||
agent = _make_agent(
|
agent = _make_agent(
|
||||||
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
|
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
|
||||||
)
|
)
|
||||||
with (
|
mock_client = _mock_resolve(
|
||||||
patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}),
|
api_key="sk-or-key",
|
||||||
patch("run_agent.OpenAI"),
|
base_url="https://openrouter.ai/api/v1",
|
||||||
|
)
|
||||||
|
with patch(
|
||||||
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
|
return_value=(mock_client, "anthropic/claude-sonnet-4"),
|
||||||
):
|
):
|
||||||
assert agent._try_activate_fallback() is True
|
assert agent._try_activate_fallback() is True
|
||||||
# Second attempt should return False
|
# Second attempt should return False
|
||||||
|
|
@ -147,9 +167,10 @@ class TestTryActivateFallback:
|
||||||
agent = _make_agent(
|
agent = _make_agent(
|
||||||
fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
|
fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"},
|
||||||
)
|
)
|
||||||
# Ensure MINIMAX_API_KEY is not in the environment
|
with patch(
|
||||||
env = {k: v for k, v in os.environ.items() if k != "MINIMAX_API_KEY"}
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
with patch.dict("os.environ", env, clear=True):
|
return_value=(None, None),
|
||||||
|
):
|
||||||
assert agent._try_activate_fallback() is False
|
assert agent._try_activate_fallback() is False
|
||||||
assert agent._fallback_activated is False
|
assert agent._fallback_activated is False
|
||||||
|
|
||||||
|
|
@ -163,22 +184,29 @@ class TestTryActivateFallback:
|
||||||
"api_key_env": "MY_CUSTOM_KEY",
|
"api_key_env": "MY_CUSTOM_KEY",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
with (
|
mock_client = _mock_resolve(
|
||||||
patch.dict("os.environ", {"MY_CUSTOM_KEY": "custom-secret"}),
|
api_key="custom-secret",
|
||||||
patch("run_agent.OpenAI") as mock_openai,
|
base_url="http://localhost:8080/v1",
|
||||||
|
)
|
||||||
|
with patch(
|
||||||
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
|
return_value=(mock_client, "my-model"),
|
||||||
):
|
):
|
||||||
assert agent._try_activate_fallback() is True
|
assert agent._try_activate_fallback() is True
|
||||||
call_kwargs = mock_openai.call_args[1]
|
assert agent.client is mock_client
|
||||||
assert call_kwargs["base_url"] == "http://localhost:8080/v1"
|
assert agent.model == "my-model"
|
||||||
assert call_kwargs["api_key"] == "custom-secret"
|
|
||||||
|
|
||||||
def test_prompt_caching_enabled_for_claude_on_openrouter(self):
|
def test_prompt_caching_enabled_for_claude_on_openrouter(self):
|
||||||
agent = _make_agent(
|
agent = _make_agent(
|
||||||
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
|
fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"},
|
||||||
)
|
)
|
||||||
with (
|
mock_client = _mock_resolve(
|
||||||
patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}),
|
api_key="sk-or-key",
|
||||||
patch("run_agent.OpenAI"),
|
base_url="https://openrouter.ai/api/v1",
|
||||||
|
)
|
||||||
|
with patch(
|
||||||
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
|
return_value=(mock_client, "anthropic/claude-sonnet-4"),
|
||||||
):
|
):
|
||||||
agent._try_activate_fallback()
|
agent._try_activate_fallback()
|
||||||
assert agent._use_prompt_caching is True
|
assert agent._use_prompt_caching is True
|
||||||
|
|
@ -187,9 +215,13 @@ class TestTryActivateFallback:
|
||||||
agent = _make_agent(
|
agent = _make_agent(
|
||||||
fallback_model={"provider": "openrouter", "model": "google/gemini-2.5-flash"},
|
fallback_model={"provider": "openrouter", "model": "google/gemini-2.5-flash"},
|
||||||
)
|
)
|
||||||
with (
|
mock_client = _mock_resolve(
|
||||||
patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}),
|
api_key="sk-or-key",
|
||||||
patch("run_agent.OpenAI"),
|
base_url="https://openrouter.ai/api/v1",
|
||||||
|
)
|
||||||
|
with patch(
|
||||||
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
|
return_value=(mock_client, "google/gemini-2.5-flash"),
|
||||||
):
|
):
|
||||||
agent._try_activate_fallback()
|
agent._try_activate_fallback()
|
||||||
assert agent._use_prompt_caching is False
|
assert agent._use_prompt_caching is False
|
||||||
|
|
@ -198,9 +230,13 @@ class TestTryActivateFallback:
|
||||||
agent = _make_agent(
|
agent = _make_agent(
|
||||||
fallback_model={"provider": "zai", "model": "glm-5"},
|
fallback_model={"provider": "zai", "model": "glm-5"},
|
||||||
)
|
)
|
||||||
with (
|
mock_client = _mock_resolve(
|
||||||
patch.dict("os.environ", {"ZAI_API_KEY": "sk-zai-key"}),
|
api_key="sk-zai-key",
|
||||||
patch("run_agent.OpenAI"),
|
base_url="https://open.z.ai/api/v1",
|
||||||
|
)
|
||||||
|
with patch(
|
||||||
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
|
return_value=(mock_client, "glm-5"),
|
||||||
):
|
):
|
||||||
agent._try_activate_fallback()
|
agent._try_activate_fallback()
|
||||||
assert agent._use_prompt_caching is False
|
assert agent._use_prompt_caching is False
|
||||||
|
|
@ -210,35 +246,36 @@ class TestTryActivateFallback:
|
||||||
agent = _make_agent(
|
agent = _make_agent(
|
||||||
fallback_model={"provider": "zai", "model": "glm-5"},
|
fallback_model={"provider": "zai", "model": "glm-5"},
|
||||||
)
|
)
|
||||||
with (
|
mock_client = _mock_resolve(
|
||||||
patch.dict("os.environ", {"Z_AI_API_KEY": "sk-alt-key"}),
|
api_key="sk-alt-key",
|
||||||
patch("run_agent.OpenAI") as mock_openai,
|
base_url="https://open.z.ai/api/v1",
|
||||||
|
)
|
||||||
|
with patch(
|
||||||
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
|
return_value=(mock_client, "glm-5"),
|
||||||
):
|
):
|
||||||
assert agent._try_activate_fallback() is True
|
assert agent._try_activate_fallback() is True
|
||||||
call_kwargs = mock_openai.call_args[1]
|
assert agent.client is mock_client
|
||||||
assert call_kwargs["api_key"] == "sk-alt-key"
|
|
||||||
|
|
||||||
def test_activates_codex_fallback(self):
|
def test_activates_codex_fallback(self):
|
||||||
"""OpenAI Codex fallback should use OAuth credentials and codex_responses mode."""
|
"""OpenAI Codex fallback should use OAuth credentials and codex_responses mode."""
|
||||||
agent = _make_agent(
|
agent = _make_agent(
|
||||||
fallback_model={"provider": "openai-codex", "model": "gpt-5.3-codex"},
|
fallback_model={"provider": "openai-codex", "model": "gpt-5.3-codex"},
|
||||||
)
|
)
|
||||||
mock_creds = {
|
mock_client = _mock_resolve(
|
||||||
"api_key": "codex-oauth-token",
|
api_key="codex-oauth-token",
|
||||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
base_url="https://chatgpt.com/backend-api/codex",
|
||||||
}
|
)
|
||||||
with (
|
with patch(
|
||||||
patch("hermes_cli.auth.resolve_codex_runtime_credentials", return_value=mock_creds),
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
patch("run_agent.OpenAI") as mock_openai,
|
return_value=(mock_client, "gpt-5.3-codex"),
|
||||||
):
|
):
|
||||||
result = agent._try_activate_fallback()
|
result = agent._try_activate_fallback()
|
||||||
assert result is True
|
assert result is True
|
||||||
assert agent.model == "gpt-5.3-codex"
|
assert agent.model == "gpt-5.3-codex"
|
||||||
assert agent.provider == "openai-codex"
|
assert agent.provider == "openai-codex"
|
||||||
assert agent.api_mode == "codex_responses"
|
assert agent.api_mode == "codex_responses"
|
||||||
call_kwargs = mock_openai.call_args[1]
|
assert agent.client is mock_client
|
||||||
assert call_kwargs["api_key"] == "codex-oauth-token"
|
|
||||||
assert "chatgpt.com" in call_kwargs["base_url"]
|
|
||||||
|
|
||||||
def test_codex_fallback_fails_gracefully_without_credentials(self):
|
def test_codex_fallback_fails_gracefully_without_credentials(self):
|
||||||
"""Codex fallback should return False if no OAuth credentials available."""
|
"""Codex fallback should return False if no OAuth credentials available."""
|
||||||
|
|
@ -246,8 +283,8 @@ class TestTryActivateFallback:
|
||||||
fallback_model={"provider": "openai-codex", "model": "gpt-5.3-codex"},
|
fallback_model={"provider": "openai-codex", "model": "gpt-5.3-codex"},
|
||||||
)
|
)
|
||||||
with patch(
|
with patch(
|
||||||
"hermes_cli.auth.resolve_codex_runtime_credentials",
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
side_effect=Exception("No Codex credentials"),
|
return_value=(None, None),
|
||||||
):
|
):
|
||||||
assert agent._try_activate_fallback() is False
|
assert agent._try_activate_fallback() is False
|
||||||
assert agent._fallback_activated is False
|
assert agent._fallback_activated is False
|
||||||
|
|
@ -257,22 +294,20 @@ class TestTryActivateFallback:
|
||||||
agent = _make_agent(
|
agent = _make_agent(
|
||||||
fallback_model={"provider": "nous", "model": "nous-hermes-3"},
|
fallback_model={"provider": "nous", "model": "nous-hermes-3"},
|
||||||
)
|
)
|
||||||
mock_creds = {
|
mock_client = _mock_resolve(
|
||||||
"api_key": "nous-agent-key-abc",
|
api_key="nous-agent-key-abc",
|
||||||
"base_url": "https://inference-api.nousresearch.com/v1",
|
base_url="https://inference-api.nousresearch.com/v1",
|
||||||
}
|
)
|
||||||
with (
|
with patch(
|
||||||
patch("hermes_cli.auth.resolve_nous_runtime_credentials", return_value=mock_creds),
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
patch("run_agent.OpenAI") as mock_openai,
|
return_value=(mock_client, "nous-hermes-3"),
|
||||||
):
|
):
|
||||||
result = agent._try_activate_fallback()
|
result = agent._try_activate_fallback()
|
||||||
assert result is True
|
assert result is True
|
||||||
assert agent.model == "nous-hermes-3"
|
assert agent.model == "nous-hermes-3"
|
||||||
assert agent.provider == "nous"
|
assert agent.provider == "nous"
|
||||||
assert agent.api_mode == "chat_completions"
|
assert agent.api_mode == "chat_completions"
|
||||||
call_kwargs = mock_openai.call_args[1]
|
assert agent.client is mock_client
|
||||||
assert call_kwargs["api_key"] == "nous-agent-key-abc"
|
|
||||||
assert "nousresearch.com" in call_kwargs["base_url"]
|
|
||||||
|
|
||||||
def test_nous_fallback_fails_gracefully_without_login(self):
|
def test_nous_fallback_fails_gracefully_without_login(self):
|
||||||
"""Nous fallback should return False if not logged in."""
|
"""Nous fallback should return False if not logged in."""
|
||||||
|
|
@ -280,8 +315,8 @@ class TestTryActivateFallback:
|
||||||
fallback_model={"provider": "nous", "model": "nous-hermes-3"},
|
fallback_model={"provider": "nous", "model": "nous-hermes-3"},
|
||||||
)
|
)
|
||||||
with patch(
|
with patch(
|
||||||
"hermes_cli.auth.resolve_nous_runtime_credentials",
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
side_effect=Exception("Not logged in to Nous Portal"),
|
return_value=(None, None),
|
||||||
):
|
):
|
||||||
assert agent._try_activate_fallback() is False
|
assert agent._try_activate_fallback() is False
|
||||||
assert agent._fallback_activated is False
|
assert agent._fallback_activated is False
|
||||||
|
|
@ -315,7 +350,7 @@ class TestFallbackInit:
|
||||||
# =============================================================================
|
# =============================================================================
|
||||||
|
|
||||||
class TestProviderCredentials:
|
class TestProviderCredentials:
|
||||||
"""Verify that each supported provider resolves its API key correctly."""
|
"""Verify that each supported provider resolves via the centralized router."""
|
||||||
|
|
||||||
@pytest.mark.parametrize("provider,env_var,base_url_fragment", [
|
@pytest.mark.parametrize("provider,env_var,base_url_fragment", [
|
||||||
("openrouter", "OPENROUTER_API_KEY", "openrouter"),
|
("openrouter", "OPENROUTER_API_KEY", "openrouter"),
|
||||||
|
|
@ -328,12 +363,15 @@ class TestProviderCredentials:
|
||||||
agent = _make_agent(
|
agent = _make_agent(
|
||||||
fallback_model={"provider": provider, "model": "test-model"},
|
fallback_model={"provider": provider, "model": "test-model"},
|
||||||
)
|
)
|
||||||
with (
|
mock_client = MagicMock()
|
||||||
patch.dict("os.environ", {env_var: "test-key-123"}),
|
mock_client.api_key = "test-api-key"
|
||||||
patch("run_agent.OpenAI") as mock_openai,
|
mock_client.base_url = f"https://{base_url_fragment}/v1"
|
||||||
|
with patch(
|
||||||
|
"agent.auxiliary_client.resolve_provider_client",
|
||||||
|
return_value=(mock_client, "test-model"),
|
||||||
):
|
):
|
||||||
result = agent._try_activate_fallback()
|
result = agent._try_activate_fallback()
|
||||||
assert result is True, f"Failed to activate fallback for {provider}"
|
assert result is True, f"Failed to activate fallback for {provider}"
|
||||||
call_kwargs = mock_openai.call_args[1]
|
assert agent.client is mock_client
|
||||||
assert call_kwargs["api_key"] == "test-key-123"
|
assert agent.model == "test-model"
|
||||||
assert base_url_fragment in call_kwargs["base_url"].lower()
|
assert agent.provider == provider
|
||||||
|
|
|
||||||
|
|
@ -98,10 +98,9 @@ class TestFlushMemoriesUsesAuxiliaryClient:
|
||||||
def test_flush_uses_auxiliary_when_available(self, monkeypatch):
|
def test_flush_uses_auxiliary_when_available(self, monkeypatch):
|
||||||
agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
|
agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
|
||||||
|
|
||||||
mock_aux_client = MagicMock()
|
mock_response = _chat_response_with_memory_call()
|
||||||
mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
|
||||||
|
|
||||||
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
|
with patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_call:
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "user", "content": "Hello"},
|
{"role": "user", "content": "Hello"},
|
||||||
{"role": "assistant", "content": "Hi there"},
|
{"role": "assistant", "content": "Hi there"},
|
||||||
|
|
@ -110,9 +109,9 @@ class TestFlushMemoriesUsesAuxiliaryClient:
|
||||||
with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
|
with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
|
||||||
agent.flush_memories(messages)
|
agent.flush_memories(messages)
|
||||||
|
|
||||||
mock_aux_client.chat.completions.create.assert_called_once()
|
mock_call.assert_called_once()
|
||||||
call_kwargs = mock_aux_client.chat.completions.create.call_args
|
call_kwargs = mock_call.call_args
|
||||||
assert call_kwargs.kwargs.get("model") == "gpt-4o-mini" or call_kwargs[1].get("model") == "gpt-4o-mini"
|
assert call_kwargs.kwargs.get("task") == "flush_memories"
|
||||||
|
|
||||||
def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch):
|
def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch):
|
||||||
"""Non-Codex mode with no auxiliary falls back to self.client."""
|
"""Non-Codex mode with no auxiliary falls back to self.client."""
|
||||||
|
|
@ -120,7 +119,7 @@ class TestFlushMemoriesUsesAuxiliaryClient:
|
||||||
agent.client = MagicMock()
|
agent.client = MagicMock()
|
||||||
agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
||||||
|
|
||||||
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)):
|
with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")):
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "user", "content": "Hello"},
|
{"role": "user", "content": "Hello"},
|
||||||
{"role": "assistant", "content": "Hi there"},
|
{"role": "assistant", "content": "Hi there"},
|
||||||
|
|
@ -135,10 +134,9 @@ class TestFlushMemoriesUsesAuxiliaryClient:
|
||||||
"""Verify that memory tool calls from the flush response actually get executed."""
|
"""Verify that memory tool calls from the flush response actually get executed."""
|
||||||
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
||||||
|
|
||||||
mock_aux_client = MagicMock()
|
mock_response = _chat_response_with_memory_call()
|
||||||
mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
|
||||||
|
|
||||||
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
|
with patch("agent.auxiliary_client.call_llm", return_value=mock_response):
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "user", "content": "Hello"},
|
{"role": "user", "content": "Hello"},
|
||||||
{"role": "assistant", "content": "Hi"},
|
{"role": "assistant", "content": "Hi"},
|
||||||
|
|
@ -157,10 +155,9 @@ class TestFlushMemoriesUsesAuxiliaryClient:
|
||||||
"""After flush, the flush prompt and any response should be removed from messages."""
|
"""After flush, the flush prompt and any response should be removed from messages."""
|
||||||
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
|
||||||
|
|
||||||
mock_aux_client = MagicMock()
|
mock_response = _chat_response_with_memory_call()
|
||||||
mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
|
|
||||||
|
|
||||||
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
|
with patch("agent.auxiliary_client.call_llm", return_value=mock_response):
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "user", "content": "Hello"},
|
{"role": "user", "content": "Hello"},
|
||||||
{"role": "assistant", "content": "Hi"},
|
{"role": "assistant", "content": "Hi"},
|
||||||
|
|
@ -202,7 +199,7 @@ class TestFlushMemoriesCodexFallback:
|
||||||
model="gpt-5-codex",
|
model="gpt-5-codex",
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)), \
|
with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")), \
|
||||||
patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
|
patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
|
||||||
patch.object(agent, "_build_api_kwargs") as mock_build, \
|
patch.object(agent, "_build_api_kwargs") as mock_build, \
|
||||||
patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
|
patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
|
||||||
|
|
|
||||||
|
|
@ -959,7 +959,7 @@ class TestFlushSentinelNotLeaked:
|
||||||
agent.client.chat.completions.create.return_value = mock_response
|
agent.client.chat.completions.create.return_value = mock_response
|
||||||
|
|
||||||
# Bypass auxiliary client so flush uses agent.client directly
|
# Bypass auxiliary client so flush uses agent.client directly
|
||||||
with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)):
|
with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")):
|
||||||
agent.flush_memories(messages, min_turns=0)
|
agent.flush_memories(messages, min_turns=0)
|
||||||
|
|
||||||
# Check what was actually sent to the API
|
# Check what was actually sent to the API
|
||||||
|
|
|
||||||
|
|
@ -158,29 +158,6 @@ def test_custom_endpoint_auto_provider_prefers_openai_key(monkeypatch):
|
||||||
assert resolved["api_key"] == "sk-vllm-key"
|
assert resolved["api_key"] == "sk-vllm-key"
|
||||||
|
|
||||||
|
|
||||||
def test_resolve_runtime_provider_nous_api(monkeypatch):
|
|
||||||
"""Nous Portal API key provider resolves via the api_key path."""
|
|
||||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "nous-api")
|
|
||||||
monkeypatch.setattr(
|
|
||||||
rp,
|
|
||||||
"resolve_api_key_provider_credentials",
|
|
||||||
lambda pid: {
|
|
||||||
"provider": "nous-api",
|
|
||||||
"api_key": "nous-test-key",
|
|
||||||
"base_url": "https://inference-api.nousresearch.com/v1",
|
|
||||||
"source": "NOUS_API_KEY",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
resolved = rp.resolve_runtime_provider(requested="nous-api")
|
|
||||||
|
|
||||||
assert resolved["provider"] == "nous-api"
|
|
||||||
assert resolved["api_mode"] == "chat_completions"
|
|
||||||
assert resolved["base_url"] == "https://inference-api.nousresearch.com/v1"
|
|
||||||
assert resolved["api_key"] == "nous-test-key"
|
|
||||||
assert resolved["requested_provider"] == "nous-api"
|
|
||||||
|
|
||||||
|
|
||||||
def test_explicit_openrouter_skips_openai_base_url(monkeypatch):
|
def test_explicit_openrouter_skips_openai_base_url(monkeypatch):
|
||||||
"""When the user explicitly requests openrouter, OPENAI_BASE_URL
|
"""When the user explicitly requests openrouter, OPENAI_BASE_URL
|
||||||
(which may point to a custom endpoint) must not override the
|
(which may point to a custom endpoint) must not override the
|
||||||
|
|
|
||||||
|
|
@ -137,8 +137,7 @@ class TestBrowserVisionAnnotate:
|
||||||
|
|
||||||
with (
|
with (
|
||||||
patch("tools.browser_tool._run_browser_command") as mock_cmd,
|
patch("tools.browser_tool._run_browser_command") as mock_cmd,
|
||||||
patch("tools.browser_tool._aux_vision_client") as mock_client,
|
patch("tools.browser_tool.call_llm") as mock_call_llm,
|
||||||
patch("tools.browser_tool._DEFAULT_VISION_MODEL", "test-model"),
|
|
||||||
patch("tools.browser_tool._get_vision_model", return_value="test-model"),
|
patch("tools.browser_tool._get_vision_model", return_value="test-model"),
|
||||||
):
|
):
|
||||||
mock_cmd.return_value = {"success": True, "data": {}}
|
mock_cmd.return_value = {"success": True, "data": {}}
|
||||||
|
|
@ -159,8 +158,7 @@ class TestBrowserVisionAnnotate:
|
||||||
|
|
||||||
with (
|
with (
|
||||||
patch("tools.browser_tool._run_browser_command") as mock_cmd,
|
patch("tools.browser_tool._run_browser_command") as mock_cmd,
|
||||||
patch("tools.browser_tool._aux_vision_client") as mock_client,
|
patch("tools.browser_tool.call_llm") as mock_call_llm,
|
||||||
patch("tools.browser_tool._DEFAULT_VISION_MODEL", "test-model"),
|
|
||||||
patch("tools.browser_tool._get_vision_model", return_value="test-model"),
|
patch("tools.browser_tool._get_vision_model", return_value="test-model"),
|
||||||
):
|
):
|
||||||
mock_cmd.return_value = {"success": True, "data": {}}
|
mock_cmd.return_value = {"success": True, "data": {}}
|
||||||
|
|
|
||||||
|
|
@ -1828,8 +1828,8 @@ class TestSamplingCallbackText:
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
params = _make_sampling_params()
|
params = _make_sampling_params()
|
||||||
result = asyncio.run(self.handler(None, params))
|
result = asyncio.run(self.handler(None, params))
|
||||||
|
|
@ -1847,13 +1847,13 @@ class TestSamplingCallbackText:
|
||||||
fake_client.chat.completions.create.return_value = _make_llm_response()
|
fake_client.chat.completions.create.return_value = _make_llm_response()
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
) as mock_call:
|
||||||
params = _make_sampling_params(system_prompt="Be helpful")
|
params = _make_sampling_params(system_prompt="Be helpful")
|
||||||
asyncio.run(self.handler(None, params))
|
asyncio.run(self.handler(None, params))
|
||||||
|
|
||||||
call_args = fake_client.chat.completions.create.call_args
|
call_args = mock_call.call_args
|
||||||
messages = call_args.kwargs["messages"]
|
messages = call_args.kwargs["messages"]
|
||||||
assert messages[0] == {"role": "system", "content": "Be helpful"}
|
assert messages[0] == {"role": "system", "content": "Be helpful"}
|
||||||
|
|
||||||
|
|
@ -1865,8 +1865,8 @@ class TestSamplingCallbackText:
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
params = _make_sampling_params()
|
params = _make_sampling_params()
|
||||||
result = asyncio.run(self.handler(None, params))
|
result = asyncio.run(self.handler(None, params))
|
||||||
|
|
@ -1889,8 +1889,8 @@ class TestSamplingCallbackToolUse:
|
||||||
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
params = _make_sampling_params()
|
params = _make_sampling_params()
|
||||||
result = asyncio.run(self.handler(None, params))
|
result = asyncio.run(self.handler(None, params))
|
||||||
|
|
@ -1916,8 +1916,8 @@ class TestSamplingCallbackToolUse:
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
result = asyncio.run(self.handler(None, _make_sampling_params()))
|
result = asyncio.run(self.handler(None, _make_sampling_params()))
|
||||||
|
|
||||||
|
|
@ -1939,8 +1939,8 @@ class TestToolLoopGovernance:
|
||||||
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
params = _make_sampling_params()
|
params = _make_sampling_params()
|
||||||
# Round 1, 2: allowed
|
# Round 1, 2: allowed
|
||||||
|
|
@ -1956,24 +1956,26 @@ class TestToolLoopGovernance:
|
||||||
def test_text_response_resets_counter(self):
|
def test_text_response_resets_counter(self):
|
||||||
"""A text response resets the tool loop counter."""
|
"""A text response resets the tool loop counter."""
|
||||||
handler = SamplingHandler("tl2", {"max_tool_rounds": 1})
|
handler = SamplingHandler("tl2", {"max_tool_rounds": 1})
|
||||||
fake_client = MagicMock()
|
|
||||||
|
# Use a list to hold the current response, so the side_effect can
|
||||||
|
# pick up changes between calls.
|
||||||
|
responses = [_make_llm_tool_response()]
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
side_effect=lambda **kw: responses[0],
|
||||||
):
|
):
|
||||||
# Tool response (round 1 of 1 allowed)
|
# Tool response (round 1 of 1 allowed)
|
||||||
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
|
||||||
r1 = asyncio.run(handler(None, _make_sampling_params()))
|
r1 = asyncio.run(handler(None, _make_sampling_params()))
|
||||||
assert isinstance(r1, CreateMessageResultWithTools)
|
assert isinstance(r1, CreateMessageResultWithTools)
|
||||||
|
|
||||||
# Text response resets counter
|
# Text response resets counter
|
||||||
fake_client.chat.completions.create.return_value = _make_llm_response()
|
responses[0] = _make_llm_response()
|
||||||
r2 = asyncio.run(handler(None, _make_sampling_params()))
|
r2 = asyncio.run(handler(None, _make_sampling_params()))
|
||||||
assert isinstance(r2, CreateMessageResult)
|
assert isinstance(r2, CreateMessageResult)
|
||||||
|
|
||||||
# Tool response again (should succeed since counter was reset)
|
# Tool response again (should succeed since counter was reset)
|
||||||
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
responses[0] = _make_llm_tool_response()
|
||||||
r3 = asyncio.run(handler(None, _make_sampling_params()))
|
r3 = asyncio.run(handler(None, _make_sampling_params()))
|
||||||
assert isinstance(r3, CreateMessageResultWithTools)
|
assert isinstance(r3, CreateMessageResultWithTools)
|
||||||
|
|
||||||
|
|
@ -1984,8 +1986,8 @@ class TestToolLoopGovernance:
|
||||||
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||||
assert isinstance(result, ErrorData)
|
assert isinstance(result, ErrorData)
|
||||||
|
|
@ -2003,8 +2005,8 @@ class TestSamplingErrors:
|
||||||
fake_client.chat.completions.create.return_value = _make_llm_response()
|
fake_client.chat.completions.create.return_value = _make_llm_response()
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
# First call succeeds
|
# First call succeeds
|
||||||
r1 = asyncio.run(handler(None, _make_sampling_params()))
|
r1 = asyncio.run(handler(None, _make_sampling_params()))
|
||||||
|
|
@ -2017,20 +2019,16 @@ class TestSamplingErrors:
|
||||||
|
|
||||||
def test_timeout_error(self):
|
def test_timeout_error(self):
|
||||||
handler = SamplingHandler("to", {"timeout": 0.05})
|
handler = SamplingHandler("to", {"timeout": 0.05})
|
||||||
fake_client = MagicMock()
|
|
||||||
|
|
||||||
def slow_call(**kwargs):
|
def slow_call(**kwargs):
|
||||||
import threading
|
import threading
|
||||||
# Use an event to ensure the thread truly blocks long enough
|
|
||||||
evt = threading.Event()
|
evt = threading.Event()
|
||||||
evt.wait(5) # blocks for up to 5 seconds (cancelled by timeout)
|
evt.wait(5) # blocks for up to 5 seconds (cancelled by timeout)
|
||||||
return _make_llm_response()
|
return _make_llm_response()
|
||||||
|
|
||||||
fake_client.chat.completions.create.side_effect = slow_call
|
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
side_effect=slow_call,
|
||||||
):
|
):
|
||||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||||
assert isinstance(result, ErrorData)
|
assert isinstance(result, ErrorData)
|
||||||
|
|
@ -2041,12 +2039,11 @@ class TestSamplingErrors:
|
||||||
handler = SamplingHandler("np", {})
|
handler = SamplingHandler("np", {})
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(None, None),
|
side_effect=RuntimeError("No LLM provider configured"),
|
||||||
):
|
):
|
||||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||||
assert isinstance(result, ErrorData)
|
assert isinstance(result, ErrorData)
|
||||||
assert "No LLM provider" in result.message
|
|
||||||
assert handler.metrics["errors"] == 1
|
assert handler.metrics["errors"] == 1
|
||||||
|
|
||||||
def test_empty_choices_returns_error(self):
|
def test_empty_choices_returns_error(self):
|
||||||
|
|
@ -2060,8 +2057,8 @@ class TestSamplingErrors:
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||||
|
|
||||||
|
|
@ -2080,8 +2077,8 @@ class TestSamplingErrors:
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||||
|
|
||||||
|
|
@ -2099,8 +2096,8 @@ class TestSamplingErrors:
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||||
|
|
||||||
|
|
@ -2120,19 +2117,19 @@ class TestModelWhitelist:
|
||||||
fake_client.chat.completions.create.return_value = _make_llm_response()
|
fake_client.chat.completions.create.return_value = _make_llm_response()
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "test-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||||
assert isinstance(result, CreateMessageResult)
|
assert isinstance(result, CreateMessageResult)
|
||||||
|
|
||||||
def test_disallowed_model_rejected(self):
|
def test_disallowed_model_rejected(self):
|
||||||
handler = SamplingHandler("wl2", {"allowed_models": ["gpt-4o"]})
|
handler = SamplingHandler("wl2", {"allowed_models": ["gpt-4o"], "model": "test-model"})
|
||||||
fake_client = MagicMock()
|
fake_client = MagicMock()
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "gpt-3.5-turbo"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||||
assert isinstance(result, ErrorData)
|
assert isinstance(result, ErrorData)
|
||||||
|
|
@ -2145,8 +2142,8 @@ class TestModelWhitelist:
|
||||||
fake_client.chat.completions.create.return_value = _make_llm_response()
|
fake_client.chat.completions.create.return_value = _make_llm_response()
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "any-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||||
assert isinstance(result, CreateMessageResult)
|
assert isinstance(result, CreateMessageResult)
|
||||||
|
|
@ -2166,8 +2163,8 @@ class TestMalformedToolCallArgs:
|
||||||
)
|
)
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||||
|
|
||||||
|
|
@ -2194,8 +2191,8 @@ class TestMalformedToolCallArgs:
|
||||||
fake_client.chat.completions.create.return_value = response
|
fake_client.chat.completions.create.return_value = response
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
result = asyncio.run(handler(None, _make_sampling_params()))
|
result = asyncio.run(handler(None, _make_sampling_params()))
|
||||||
|
|
||||||
|
|
@ -2214,8 +2211,8 @@ class TestMetricsTracking:
|
||||||
fake_client.chat.completions.create.return_value = _make_llm_response()
|
fake_client.chat.completions.create.return_value = _make_llm_response()
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
asyncio.run(handler(None, _make_sampling_params()))
|
asyncio.run(handler(None, _make_sampling_params()))
|
||||||
|
|
||||||
|
|
@ -2229,8 +2226,8 @@ class TestMetricsTracking:
|
||||||
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
fake_client.chat.completions.create.return_value = _make_llm_tool_response()
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(fake_client, "default-model"),
|
return_value=fake_client.chat.completions.create.return_value,
|
||||||
):
|
):
|
||||||
asyncio.run(handler(None, _make_sampling_params()))
|
asyncio.run(handler(None, _make_sampling_params()))
|
||||||
|
|
||||||
|
|
@ -2241,8 +2238,8 @@ class TestMetricsTracking:
|
||||||
handler = SamplingHandler("met3", {})
|
handler = SamplingHandler("met3", {})
|
||||||
|
|
||||||
with patch(
|
with patch(
|
||||||
"agent.auxiliary_client.get_text_auxiliary_client",
|
"agent.auxiliary_client.call_llm",
|
||||||
return_value=(None, None),
|
side_effect=RuntimeError("No LLM provider configured"),
|
||||||
):
|
):
|
||||||
asyncio.run(handler(None, _make_sampling_params()))
|
asyncio.run(handler(None, _make_sampling_params()))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -189,16 +189,14 @@ class TestSessionSearch:
|
||||||
{"role": "assistant", "content": "hi there"},
|
{"role": "assistant", "content": "hi there"},
|
||||||
]
|
]
|
||||||
|
|
||||||
# Mock the summarizer to return a simple summary
|
# Mock async_call_llm to raise RuntimeError → summarizer returns None
|
||||||
import tools.session_search_tool as sst
|
from unittest.mock import AsyncMock, patch as _patch
|
||||||
original_client = sst._async_aux_client
|
with _patch("tools.session_search_tool.async_call_llm",
|
||||||
sst._async_aux_client = None # Disable summarizer → returns None
|
new_callable=AsyncMock,
|
||||||
|
side_effect=RuntimeError("no provider")):
|
||||||
result = json.loads(session_search(
|
result = json.loads(session_search(
|
||||||
query="test", db=mock_db, current_session_id=current_sid,
|
query="test", db=mock_db, current_session_id=current_sid,
|
||||||
))
|
))
|
||||||
|
|
||||||
sst._async_aux_client = original_client
|
|
||||||
|
|
||||||
assert result["success"] is True
|
assert result["success"] is True
|
||||||
# Current session should be skipped, only other_sid should appear
|
# Current session should be skipped, only other_sid should appear
|
||||||
|
|
|
||||||
|
|
@ -202,7 +202,7 @@ class TestHandleVisionAnalyze:
|
||||||
assert model == "custom/model-v1"
|
assert model == "custom/model-v1"
|
||||||
|
|
||||||
def test_falls_back_to_default_model(self):
|
def test_falls_back_to_default_model(self):
|
||||||
"""Without AUXILIARY_VISION_MODEL, should use DEFAULT_VISION_MODEL or fallback."""
|
"""Without AUXILIARY_VISION_MODEL, model should be None (let call_llm resolve default)."""
|
||||||
with (
|
with (
|
||||||
patch(
|
patch(
|
||||||
"tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock
|
"tools.vision_tools.vision_analyze_tool", new_callable=AsyncMock
|
||||||
|
|
@ -218,9 +218,9 @@ class TestHandleVisionAnalyze:
|
||||||
coro.close()
|
coro.close()
|
||||||
call_args = mock_tool.call_args
|
call_args = mock_tool.call_args
|
||||||
model = call_args[0][2]
|
model = call_args[0][2]
|
||||||
# Should be DEFAULT_VISION_MODEL or the hardcoded fallback
|
# With no AUXILIARY_VISION_MODEL set, model should be None
|
||||||
assert model is not None
|
# (the centralized call_llm router picks the default)
|
||||||
assert len(model) > 0
|
assert model is None
|
||||||
|
|
||||||
def test_empty_args_graceful(self):
|
def test_empty_args_graceful(self):
|
||||||
"""Missing keys should default to empty strings, not raise."""
|
"""Missing keys should default to empty strings, not raise."""
|
||||||
|
|
@ -277,8 +277,6 @@ class TestErrorLoggingExcInfo:
|
||||||
new_callable=AsyncMock,
|
new_callable=AsyncMock,
|
||||||
side_effect=Exception("download boom"),
|
side_effect=Exception("download boom"),
|
||||||
),
|
),
|
||||||
patch("tools.vision_tools._aux_async_client", MagicMock()),
|
|
||||||
patch("tools.vision_tools.DEFAULT_VISION_MODEL", "test/model"),
|
|
||||||
caplog.at_level(logging.ERROR, logger="tools.vision_tools"),
|
caplog.at_level(logging.ERROR, logger="tools.vision_tools"),
|
||||||
):
|
):
|
||||||
result = await vision_analyze_tool(
|
result = await vision_analyze_tool(
|
||||||
|
|
@ -311,25 +309,16 @@ class TestErrorLoggingExcInfo:
|
||||||
"tools.vision_tools._image_to_base64_data_url",
|
"tools.vision_tools._image_to_base64_data_url",
|
||||||
return_value="data:image/jpeg;base64,abc",
|
return_value="data:image/jpeg;base64,abc",
|
||||||
),
|
),
|
||||||
patch("agent.auxiliary_client.get_auxiliary_extra_body", return_value=None),
|
|
||||||
patch(
|
|
||||||
"agent.auxiliary_client.auxiliary_max_tokens_param",
|
|
||||||
return_value={"max_tokens": 2000},
|
|
||||||
),
|
|
||||||
caplog.at_level(logging.WARNING, logger="tools.vision_tools"),
|
caplog.at_level(logging.WARNING, logger="tools.vision_tools"),
|
||||||
):
|
):
|
||||||
# Mock the vision client
|
# Mock the async_call_llm function to return a mock response
|
||||||
mock_client = AsyncMock()
|
|
||||||
mock_response = MagicMock()
|
mock_response = MagicMock()
|
||||||
mock_choice = MagicMock()
|
mock_choice = MagicMock()
|
||||||
mock_choice.message.content = "A test image description"
|
mock_choice.message.content = "A test image description"
|
||||||
mock_response.choices = [mock_choice]
|
mock_response.choices = [mock_choice]
|
||||||
mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
|
|
||||||
|
|
||||||
# Patch module-level _aux_async_client so the tool doesn't bail early
|
|
||||||
with (
|
with (
|
||||||
patch("tools.vision_tools._aux_async_client", mock_client),
|
patch("tools.vision_tools.async_call_llm", new_callable=AsyncMock, return_value=mock_response),
|
||||||
patch("tools.vision_tools.DEFAULT_VISION_MODEL", "test/model"),
|
|
||||||
):
|
):
|
||||||
# Make unlink fail to trigger cleanup warning
|
# Make unlink fail to trigger cleanup warning
|
||||||
original_unlink = Path.unlink
|
original_unlink = Path.unlink
|
||||||
|
|
|
||||||
|
|
@ -63,7 +63,7 @@ import time
|
||||||
import requests
|
import requests
|
||||||
from typing import Dict, Any, Optional, List
|
from typing import Dict, Any, Optional, List
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from agent.auxiliary_client import get_vision_auxiliary_client, get_text_auxiliary_client
|
from agent.auxiliary_client import call_llm
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -80,38 +80,15 @@ DEFAULT_SESSION_TIMEOUT = 300
|
||||||
# Max tokens for snapshot content before summarization
|
# Max tokens for snapshot content before summarization
|
||||||
SNAPSHOT_SUMMARIZE_THRESHOLD = 8000
|
SNAPSHOT_SUMMARIZE_THRESHOLD = 8000
|
||||||
|
|
||||||
# Vision client — for browser_vision (screenshot analysis)
|
|
||||||
# Wrapped in try/except so a broken auxiliary config doesn't prevent the entire
|
|
||||||
# browser_tool module from importing (which would disable all 10 browser tools).
|
|
||||||
try:
|
|
||||||
_aux_vision_client, _DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
|
|
||||||
except Exception as _init_err:
|
|
||||||
logger.debug("Could not initialise vision auxiliary client: %s", _init_err)
|
|
||||||
_aux_vision_client, _DEFAULT_VISION_MODEL = None, None
|
|
||||||
|
|
||||||
# Text client — for page snapshot summarization (same config as web_extract)
|
def _get_vision_model() -> Optional[str]:
|
||||||
try:
|
|
||||||
_aux_text_client, _DEFAULT_TEXT_MODEL = get_text_auxiliary_client("web_extract")
|
|
||||||
except Exception as _init_err:
|
|
||||||
logger.debug("Could not initialise text auxiliary client: %s", _init_err)
|
|
||||||
_aux_text_client, _DEFAULT_TEXT_MODEL = None, None
|
|
||||||
|
|
||||||
# Module-level alias for availability checks
|
|
||||||
EXTRACTION_MODEL = _DEFAULT_TEXT_MODEL or _DEFAULT_VISION_MODEL
|
|
||||||
|
|
||||||
|
|
||||||
def _get_vision_model() -> str:
|
|
||||||
"""Model for browser_vision (screenshot analysis — multimodal)."""
|
"""Model for browser_vision (screenshot analysis — multimodal)."""
|
||||||
return (os.getenv("AUXILIARY_VISION_MODEL", "").strip()
|
return os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None
|
||||||
or _DEFAULT_VISION_MODEL
|
|
||||||
or "google/gemini-3-flash-preview")
|
|
||||||
|
|
||||||
|
|
||||||
def _get_extraction_model() -> str:
|
def _get_extraction_model() -> Optional[str]:
|
||||||
"""Model for page snapshot text summarization — same as web_extract."""
|
"""Model for page snapshot text summarization — same as web_extract."""
|
||||||
return (os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip()
|
return os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None
|
||||||
or _DEFAULT_TEXT_MODEL
|
|
||||||
or "google/gemini-3-flash-preview")
|
|
||||||
|
|
||||||
|
|
||||||
def _is_local_mode() -> bool:
|
def _is_local_mode() -> bool:
|
||||||
|
|
@ -941,9 +918,6 @@ def _extract_relevant_content(
|
||||||
|
|
||||||
Falls back to simple truncation when no auxiliary text model is configured.
|
Falls back to simple truncation when no auxiliary text model is configured.
|
||||||
"""
|
"""
|
||||||
if _aux_text_client is None:
|
|
||||||
return _truncate_snapshot(snapshot_text)
|
|
||||||
|
|
||||||
if user_task:
|
if user_task:
|
||||||
extraction_prompt = (
|
extraction_prompt = (
|
||||||
f"You are a content extractor for a browser automation agent.\n\n"
|
f"You are a content extractor for a browser automation agent.\n\n"
|
||||||
|
|
@ -968,13 +942,16 @@ def _extract_relevant_content(
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from agent.auxiliary_client import auxiliary_max_tokens_param
|
call_kwargs = {
|
||||||
response = _aux_text_client.chat.completions.create(
|
"task": "web_extract",
|
||||||
model=_get_extraction_model(),
|
"messages": [{"role": "user", "content": extraction_prompt}],
|
||||||
messages=[{"role": "user", "content": extraction_prompt}],
|
"max_tokens": 4000,
|
||||||
**auxiliary_max_tokens_param(4000),
|
"temperature": 0.1,
|
||||||
temperature=0.1,
|
}
|
||||||
)
|
model = _get_extraction_model()
|
||||||
|
if model:
|
||||||
|
call_kwargs["model"] = model
|
||||||
|
response = call_llm(**call_kwargs)
|
||||||
return response.choices[0].message.content
|
return response.choices[0].message.content
|
||||||
except Exception:
|
except Exception:
|
||||||
return _truncate_snapshot(snapshot_text)
|
return _truncate_snapshot(snapshot_text)
|
||||||
|
|
@ -1497,14 +1474,6 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
||||||
|
|
||||||
effective_task_id = task_id or "default"
|
effective_task_id = task_id or "default"
|
||||||
|
|
||||||
# Check auxiliary vision client
|
|
||||||
if _aux_vision_client is None or _DEFAULT_VISION_MODEL is None:
|
|
||||||
return json.dumps({
|
|
||||||
"success": False,
|
|
||||||
"error": "Browser vision unavailable: no auxiliary vision model configured. "
|
|
||||||
"Set OPENROUTER_API_KEY or configure Nous Portal to enable browser vision."
|
|
||||||
}, ensure_ascii=False)
|
|
||||||
|
|
||||||
# Save screenshot to persistent location so it can be shared with users
|
# Save screenshot to persistent location so it can be shared with users
|
||||||
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||||
screenshots_dir = hermes_home / "browser_screenshots"
|
screenshots_dir = hermes_home / "browser_screenshots"
|
||||||
|
|
@ -1562,14 +1531,13 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
||||||
f"Focus on answering the user's specific question."
|
f"Focus on answering the user's specific question."
|
||||||
)
|
)
|
||||||
|
|
||||||
# Use the sync auxiliary vision client directly
|
# Use the centralized LLM router
|
||||||
from agent.auxiliary_client import auxiliary_max_tokens_param
|
|
||||||
vision_model = _get_vision_model()
|
vision_model = _get_vision_model()
|
||||||
logger.debug("browser_vision: analysing screenshot (%d bytes) with model=%s",
|
logger.debug("browser_vision: analysing screenshot (%d bytes)",
|
||||||
len(image_data), vision_model)
|
len(image_data))
|
||||||
response = _aux_vision_client.chat.completions.create(
|
call_kwargs = {
|
||||||
model=vision_model,
|
"task": "vision",
|
||||||
messages=[
|
"messages": [
|
||||||
{
|
{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": [
|
"content": [
|
||||||
|
|
@ -1578,9 +1546,12 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
**auxiliary_max_tokens_param(2000),
|
"max_tokens": 2000,
|
||||||
temperature=0.1,
|
"temperature": 0.1,
|
||||||
)
|
}
|
||||||
|
if vision_model:
|
||||||
|
call_kwargs["model"] = vision_model
|
||||||
|
response = call_llm(**call_kwargs)
|
||||||
|
|
||||||
analysis = response.choices[0].message.content
|
analysis = response.choices[0].message.content
|
||||||
response_data = {
|
response_data = {
|
||||||
|
|
|
||||||
|
|
@ -456,17 +456,13 @@ class SamplingHandler:
|
||||||
# Resolve model
|
# Resolve model
|
||||||
model = self._resolve_model(getattr(params, "modelPreferences", None))
|
model = self._resolve_model(getattr(params, "modelPreferences", None))
|
||||||
|
|
||||||
# Get auxiliary LLM client
|
# Get auxiliary LLM client via centralized router
|
||||||
from agent.auxiliary_client import get_text_auxiliary_client
|
from agent.auxiliary_client import call_llm
|
||||||
client, default_model = get_text_auxiliary_client()
|
|
||||||
if client is None:
|
|
||||||
self.metrics["errors"] += 1
|
|
||||||
return self._error("No LLM provider available for sampling")
|
|
||||||
|
|
||||||
resolved_model = model or default_model
|
# Model whitelist check (we need to resolve model before calling)
|
||||||
|
resolved_model = model or self.model_override or ""
|
||||||
|
|
||||||
# Model whitelist check
|
if self.allowed_models and resolved_model and resolved_model not in self.allowed_models:
|
||||||
if self.allowed_models and resolved_model not in self.allowed_models:
|
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"MCP server '%s' requested model '%s' not in allowed_models",
|
"MCP server '%s' requested model '%s' not in allowed_models",
|
||||||
self.server_name, resolved_model,
|
self.server_name, resolved_model,
|
||||||
|
|
@ -484,20 +480,15 @@ class SamplingHandler:
|
||||||
|
|
||||||
# Build LLM call kwargs
|
# Build LLM call kwargs
|
||||||
max_tokens = min(params.maxTokens, self.max_tokens_cap)
|
max_tokens = min(params.maxTokens, self.max_tokens_cap)
|
||||||
call_kwargs: dict = {
|
call_temperature = None
|
||||||
"model": resolved_model,
|
|
||||||
"messages": messages,
|
|
||||||
"max_tokens": max_tokens,
|
|
||||||
}
|
|
||||||
if hasattr(params, "temperature") and params.temperature is not None:
|
if hasattr(params, "temperature") and params.temperature is not None:
|
||||||
call_kwargs["temperature"] = params.temperature
|
call_temperature = params.temperature
|
||||||
if stop := getattr(params, "stopSequences", None):
|
|
||||||
call_kwargs["stop"] = stop
|
|
||||||
|
|
||||||
# Forward server-provided tools
|
# Forward server-provided tools
|
||||||
|
call_tools = None
|
||||||
server_tools = getattr(params, "tools", None)
|
server_tools = getattr(params, "tools", None)
|
||||||
if server_tools:
|
if server_tools:
|
||||||
call_kwargs["tools"] = [
|
call_tools = [
|
||||||
{
|
{
|
||||||
"type": "function",
|
"type": "function",
|
||||||
"function": {
|
"function": {
|
||||||
|
|
@ -508,9 +499,6 @@ class SamplingHandler:
|
||||||
}
|
}
|
||||||
for t in server_tools
|
for t in server_tools
|
||||||
]
|
]
|
||||||
if tool_choice := getattr(params, "toolChoice", None):
|
|
||||||
mode = getattr(tool_choice, "mode", "auto")
|
|
||||||
call_kwargs["tool_choice"] = {"auto": "auto", "required": "required", "none": "none"}.get(mode, "auto")
|
|
||||||
|
|
||||||
logger.log(
|
logger.log(
|
||||||
self.audit_level,
|
self.audit_level,
|
||||||
|
|
@ -520,7 +508,15 @@ class SamplingHandler:
|
||||||
|
|
||||||
# Offload sync LLM call to thread (non-blocking)
|
# Offload sync LLM call to thread (non-blocking)
|
||||||
def _sync_call():
|
def _sync_call():
|
||||||
return client.chat.completions.create(**call_kwargs)
|
return call_llm(
|
||||||
|
task="mcp",
|
||||||
|
model=resolved_model or None,
|
||||||
|
messages=messages,
|
||||||
|
temperature=call_temperature,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
tools=call_tools,
|
||||||
|
timeout=self.timeout,
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await asyncio.wait_for(
|
response = await asyncio.wait_for(
|
||||||
|
|
|
||||||
|
|
@ -1,39 +1,30 @@
|
||||||
"""Shared OpenRouter API client for Hermes tools.
|
"""Shared OpenRouter API client for Hermes tools.
|
||||||
|
|
||||||
Provides a single lazy-initialized AsyncOpenAI client that all tool modules
|
Provides a single lazy-initialized AsyncOpenAI client that all tool modules
|
||||||
can share, eliminating the duplicated _get_openrouter_client() /
|
can share. Routes through the centralized provider router in
|
||||||
_get_summarizer_client() pattern previously copy-pasted across web_tools,
|
agent/auxiliary_client.py so auth, headers, and API format are handled
|
||||||
vision_tools, mixture_of_agents_tool, and session_search_tool.
|
consistently.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from openai import AsyncOpenAI
|
_client = None
|
||||||
from hermes_constants import OPENROUTER_BASE_URL
|
|
||||||
|
|
||||||
_client: AsyncOpenAI | None = None
|
|
||||||
|
|
||||||
|
|
||||||
def get_async_client() -> AsyncOpenAI:
|
def get_async_client():
|
||||||
"""Return a shared AsyncOpenAI client pointed at OpenRouter.
|
"""Return a shared async OpenAI-compatible client for OpenRouter.
|
||||||
|
|
||||||
The client is created lazily on first call and reused thereafter.
|
The client is created lazily on first call and reused thereafter.
|
||||||
|
Uses the centralized provider router for auth and client construction.
|
||||||
Raises ValueError if OPENROUTER_API_KEY is not set.
|
Raises ValueError if OPENROUTER_API_KEY is not set.
|
||||||
"""
|
"""
|
||||||
global _client
|
global _client
|
||||||
if _client is None:
|
if _client is None:
|
||||||
api_key = os.getenv("OPENROUTER_API_KEY")
|
from agent.auxiliary_client import resolve_provider_client
|
||||||
if not api_key:
|
client, _model = resolve_provider_client("openrouter", async_mode=True)
|
||||||
|
if client is None:
|
||||||
raise ValueError("OPENROUTER_API_KEY environment variable not set")
|
raise ValueError("OPENROUTER_API_KEY environment variable not set")
|
||||||
_client = AsyncOpenAI(
|
_client = client
|
||||||
api_key=api_key,
|
|
||||||
base_url=OPENROUTER_BASE_URL,
|
|
||||||
default_headers={
|
|
||||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
|
||||||
"X-OpenRouter-Title": "Hermes Agent",
|
|
||||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
return _client
|
return _client
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -22,13 +22,7 @@ import os
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, Any, List, Optional, Union
|
from typing import Dict, Any, List, Optional, Union
|
||||||
|
|
||||||
from openai import AsyncOpenAI, OpenAI
|
from agent.auxiliary_client import async_call_llm
|
||||||
|
|
||||||
from agent.auxiliary_client import get_async_text_auxiliary_client
|
|
||||||
|
|
||||||
# Resolve the async auxiliary client at import time so we have the model slug.
|
|
||||||
# Handles Codex Responses API adapter transparently.
|
|
||||||
_async_aux_client, _SUMMARIZER_MODEL = get_async_text_auxiliary_client()
|
|
||||||
MAX_SESSION_CHARS = 100_000
|
MAX_SESSION_CHARS = 100_000
|
||||||
MAX_SUMMARY_TOKENS = 10000
|
MAX_SUMMARY_TOKENS = 10000
|
||||||
|
|
||||||
|
|
@ -156,26 +150,22 @@ async def _summarize_session(
|
||||||
f"Summarize this conversation with focus on: {query}"
|
f"Summarize this conversation with focus on: {query}"
|
||||||
)
|
)
|
||||||
|
|
||||||
if _async_aux_client is None or _SUMMARIZER_MODEL is None:
|
|
||||||
logging.warning("No auxiliary model available for session summarization")
|
|
||||||
return None
|
|
||||||
|
|
||||||
max_retries = 3
|
max_retries = 3
|
||||||
for attempt in range(max_retries):
|
for attempt in range(max_retries):
|
||||||
try:
|
try:
|
||||||
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
|
response = await async_call_llm(
|
||||||
_extra = get_auxiliary_extra_body()
|
task="session_search",
|
||||||
response = await _async_aux_client.chat.completions.create(
|
|
||||||
model=_SUMMARIZER_MODEL,
|
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": system_prompt},
|
{"role": "system", "content": system_prompt},
|
||||||
{"role": "user", "content": user_prompt},
|
{"role": "user", "content": user_prompt},
|
||||||
],
|
],
|
||||||
**({} if not _extra else {"extra_body": _extra}),
|
|
||||||
temperature=0.1,
|
temperature=0.1,
|
||||||
**auxiliary_max_tokens_param(MAX_SUMMARY_TOKENS),
|
max_tokens=MAX_SUMMARY_TOKENS,
|
||||||
)
|
)
|
||||||
return response.choices[0].message.content.strip()
|
return response.choices[0].message.content.strip()
|
||||||
|
except RuntimeError:
|
||||||
|
logging.warning("No auxiliary model available for session summarization")
|
||||||
|
return None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if attempt < max_retries - 1:
|
if attempt < max_retries - 1:
|
||||||
await asyncio.sleep(1 * (attempt + 1))
|
await asyncio.sleep(1 * (attempt + 1))
|
||||||
|
|
@ -333,8 +323,6 @@ def session_search(
|
||||||
|
|
||||||
def check_session_search_requirements() -> bool:
|
def check_session_search_requirements() -> bool:
|
||||||
"""Requires SQLite state database and an auxiliary text model."""
|
"""Requires SQLite state database and an auxiliary text model."""
|
||||||
if _async_aux_client is None:
|
|
||||||
return False
|
|
||||||
try:
|
try:
|
||||||
from hermes_state import DEFAULT_DB_PATH
|
from hermes_state import DEFAULT_DB_PATH
|
||||||
return DEFAULT_DB_PATH.parent.exists()
|
return DEFAULT_DB_PATH.parent.exists()
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
from hermes_constants import OPENROUTER_BASE_URL
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
@ -934,25 +934,12 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult,
|
||||||
if not model:
|
if not model:
|
||||||
return static_result
|
return static_result
|
||||||
|
|
||||||
# Call the LLM via the OpenAI SDK (same pattern as run_agent.py)
|
# Call the LLM via the centralized provider router
|
||||||
try:
|
try:
|
||||||
from openai import OpenAI
|
from agent.auxiliary_client import call_llm
|
||||||
import os
|
|
||||||
|
|
||||||
api_key = os.getenv("OPENROUTER_API_KEY", "")
|
response = call_llm(
|
||||||
if not api_key:
|
provider="openrouter",
|
||||||
return static_result
|
|
||||||
|
|
||||||
client = OpenAI(
|
|
||||||
base_url=OPENROUTER_BASE_URL,
|
|
||||||
api_key=api_key,
|
|
||||||
default_headers={
|
|
||||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
|
||||||
"X-OpenRouter-Title": "Hermes Agent",
|
|
||||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
|
||||||
},
|
|
||||||
)
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
model=model,
|
model=model,
|
||||||
messages=[{
|
messages=[{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
|
|
|
||||||
|
|
@ -37,28 +37,11 @@ from pathlib import Path
|
||||||
from typing import Any, Awaitable, Dict, Optional
|
from typing import Any, Awaitable, Dict, Optional
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
import httpx
|
import httpx
|
||||||
from openai import AsyncOpenAI
|
from agent.auxiliary_client import async_call_llm
|
||||||
from agent.auxiliary_client import get_vision_auxiliary_client
|
|
||||||
from tools.debug_helpers import DebugSession
|
from tools.debug_helpers import DebugSession
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# Resolve vision auxiliary client at module level; build an async wrapper.
|
|
||||||
_aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
|
|
||||||
_aux_async_client: AsyncOpenAI | None = None
|
|
||||||
if _aux_sync_client is not None:
|
|
||||||
_async_kwargs = {
|
|
||||||
"api_key": _aux_sync_client.api_key,
|
|
||||||
"base_url": str(_aux_sync_client.base_url),
|
|
||||||
}
|
|
||||||
if "openrouter" in str(_aux_sync_client.base_url).lower():
|
|
||||||
_async_kwargs["default_headers"] = {
|
|
||||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
|
||||||
"X-OpenRouter-Title": "Hermes Agent",
|
|
||||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
|
||||||
}
|
|
||||||
_aux_async_client = AsyncOpenAI(**_async_kwargs)
|
|
||||||
|
|
||||||
_debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")
|
_debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -197,7 +180,7 @@ def _image_to_base64_data_url(image_path: Path, mime_type: Optional[str] = None)
|
||||||
async def vision_analyze_tool(
|
async def vision_analyze_tool(
|
||||||
image_url: str,
|
image_url: str,
|
||||||
user_prompt: str,
|
user_prompt: str,
|
||||||
model: str = DEFAULT_VISION_MODEL,
|
model: str = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Analyze an image from a URL or local file path using vision AI.
|
Analyze an image from a URL or local file path using vision AI.
|
||||||
|
|
@ -257,15 +240,6 @@ async def vision_analyze_tool(
|
||||||
logger.info("Analyzing image: %s", image_url[:60])
|
logger.info("Analyzing image: %s", image_url[:60])
|
||||||
logger.info("User prompt: %s", user_prompt[:100])
|
logger.info("User prompt: %s", user_prompt[:100])
|
||||||
|
|
||||||
# Check auxiliary vision client availability
|
|
||||||
if _aux_async_client is None or DEFAULT_VISION_MODEL is None:
|
|
||||||
logger.error("Vision analysis unavailable: no auxiliary vision model configured")
|
|
||||||
return json.dumps({
|
|
||||||
"success": False,
|
|
||||||
"analysis": "Vision analysis unavailable: no auxiliary vision model configured. "
|
|
||||||
"Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools."
|
|
||||||
}, indent=2, ensure_ascii=False)
|
|
||||||
|
|
||||||
# Determine if this is a local file path or a remote URL
|
# Determine if this is a local file path or a remote URL
|
||||||
local_path = Path(image_url)
|
local_path = Path(image_url)
|
||||||
if local_path.is_file():
|
if local_path.is_file():
|
||||||
|
|
@ -321,18 +295,18 @@ async def vision_analyze_tool(
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
logger.info("Processing image with %s...", model)
|
logger.info("Processing image with vision model...")
|
||||||
|
|
||||||
# Call the vision API
|
# Call the vision API via centralized router
|
||||||
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
|
call_kwargs = {
|
||||||
_extra = get_auxiliary_extra_body()
|
"task": "vision",
|
||||||
response = await _aux_async_client.chat.completions.create(
|
"messages": messages,
|
||||||
model=model,
|
"temperature": 0.1,
|
||||||
messages=messages,
|
"max_tokens": 2000,
|
||||||
temperature=0.1,
|
}
|
||||||
**auxiliary_max_tokens_param(2000),
|
if model:
|
||||||
**({} if not _extra else {"extra_body": _extra}),
|
call_kwargs["model"] = model
|
||||||
)
|
response = await async_call_llm(**call_kwargs)
|
||||||
|
|
||||||
# Extract the analysis
|
# Extract the analysis
|
||||||
analysis = response.choices[0].message.content.strip()
|
analysis = response.choices[0].message.content.strip()
|
||||||
|
|
@ -359,10 +333,28 @@ async def vision_analyze_tool(
|
||||||
error_msg = f"Error analyzing image: {str(e)}"
|
error_msg = f"Error analyzing image: {str(e)}"
|
||||||
logger.error("%s", error_msg, exc_info=True)
|
logger.error("%s", error_msg, exc_info=True)
|
||||||
|
|
||||||
|
# Detect vision capability errors — give the model a clear message
|
||||||
|
# so it can inform the user instead of a cryptic API error.
|
||||||
|
err_str = str(e).lower()
|
||||||
|
if any(hint in err_str for hint in (
|
||||||
|
"does not support", "not support image", "invalid_request",
|
||||||
|
"content_policy", "image_url", "multimodal",
|
||||||
|
"unrecognized request argument", "image input",
|
||||||
|
)):
|
||||||
|
analysis = (
|
||||||
|
f"{model} does not support vision or our request was not "
|
||||||
|
f"accepted by the server. Error: {e}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
analysis = (
|
||||||
|
"There was a problem with the request and the image could not "
|
||||||
|
f"be analyzed. Error: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
# Prepare error response
|
# Prepare error response
|
||||||
result = {
|
result = {
|
||||||
"success": False,
|
"success": False,
|
||||||
"analysis": "There was a problem with the request and the image could not be analyzed."
|
"analysis": analysis,
|
||||||
}
|
}
|
||||||
|
|
||||||
debug_call_data["error"] = error_msg
|
debug_call_data["error"] = error_msg
|
||||||
|
|
@ -385,7 +377,18 @@ async def vision_analyze_tool(
|
||||||
|
|
||||||
def check_vision_requirements() -> bool:
|
def check_vision_requirements() -> bool:
|
||||||
"""Check if an auxiliary vision model is available."""
|
"""Check if an auxiliary vision model is available."""
|
||||||
return _aux_async_client is not None
|
try:
|
||||||
|
from agent.auxiliary_client import resolve_provider_client
|
||||||
|
client, _ = resolve_provider_client("openrouter")
|
||||||
|
if client is not None:
|
||||||
|
return True
|
||||||
|
client, _ = resolve_provider_client("nous")
|
||||||
|
if client is not None:
|
||||||
|
return True
|
||||||
|
client, _ = resolve_provider_client("custom")
|
||||||
|
return client is not None
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def get_debug_session_info() -> Dict[str, Any]:
|
def get_debug_session_info() -> Dict[str, Any]:
|
||||||
|
|
@ -413,10 +416,9 @@ if __name__ == "__main__":
|
||||||
print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.")
|
print("Set OPENROUTER_API_KEY or configure Nous Portal to enable vision tools.")
|
||||||
exit(1)
|
exit(1)
|
||||||
else:
|
else:
|
||||||
print(f"✅ Vision model available: {DEFAULT_VISION_MODEL}")
|
print("✅ Vision model available")
|
||||||
|
|
||||||
print("🛠️ Vision tools ready for use!")
|
print("🛠️ Vision tools ready for use!")
|
||||||
print(f"🧠 Using model: {DEFAULT_VISION_MODEL}")
|
|
||||||
|
|
||||||
# Show debug mode status
|
# Show debug mode status
|
||||||
if _debug.active:
|
if _debug.active:
|
||||||
|
|
@ -483,9 +485,7 @@ def _handle_vision_analyze(args: Dict[str, Any], **kw: Any) -> Awaitable[str]:
|
||||||
"Fully describe and explain everything about this image, then answer the "
|
"Fully describe and explain everything about this image, then answer the "
|
||||||
f"following question:\n\n{question}"
|
f"following question:\n\n{question}"
|
||||||
)
|
)
|
||||||
model = (os.getenv("AUXILIARY_VISION_MODEL", "").strip()
|
model = os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None
|
||||||
or DEFAULT_VISION_MODEL
|
|
||||||
or "google/gemini-3-flash-preview")
|
|
||||||
return vision_analyze_tool(image_url, full_prompt, model)
|
return vision_analyze_tool(image_url, full_prompt, model)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -47,8 +47,7 @@ import re
|
||||||
import asyncio
|
import asyncio
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any, Optional
|
||||||
from firecrawl import Firecrawl
|
from firecrawl import Firecrawl
|
||||||
from openai import AsyncOpenAI
|
from agent.auxiliary_client import async_call_llm
|
||||||
from agent.auxiliary_client import get_async_text_auxiliary_client
|
|
||||||
from tools.debug_helpers import DebugSession
|
from tools.debug_helpers import DebugSession
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
@ -83,15 +82,8 @@ def _get_firecrawl_client():
|
||||||
|
|
||||||
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
|
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
|
||||||
|
|
||||||
# Resolve async auxiliary client at module level.
|
# Allow per-task override via env var
|
||||||
# Handles Codex Responses API adapter transparently.
|
DEFAULT_SUMMARIZER_MODEL = os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() or None
|
||||||
_aux_async_client, _DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client("web_extract")
|
|
||||||
|
|
||||||
# Allow per-task override via config.yaml auxiliary.web_extract_model
|
|
||||||
DEFAULT_SUMMARIZER_MODEL = (
|
|
||||||
os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip()
|
|
||||||
or _DEFAULT_SUMMARIZER_MODEL
|
|
||||||
)
|
|
||||||
|
|
||||||
_debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
|
_debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
|
||||||
|
|
||||||
|
|
@ -249,22 +241,22 @@ Create a markdown summary that captures all key information in a well-organized,
|
||||||
|
|
||||||
for attempt in range(max_retries):
|
for attempt in range(max_retries):
|
||||||
try:
|
try:
|
||||||
if _aux_async_client is None:
|
call_kwargs = {
|
||||||
logger.warning("No auxiliary model available for web content processing")
|
"task": "web_extract",
|
||||||
return None
|
"messages": [
|
||||||
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
|
|
||||||
_extra = get_auxiliary_extra_body()
|
|
||||||
response = await _aux_async_client.chat.completions.create(
|
|
||||||
model=model,
|
|
||||||
messages=[
|
|
||||||
{"role": "system", "content": system_prompt},
|
{"role": "system", "content": system_prompt},
|
||||||
{"role": "user", "content": user_prompt}
|
{"role": "user", "content": user_prompt}
|
||||||
],
|
],
|
||||||
temperature=0.1,
|
"temperature": 0.1,
|
||||||
**auxiliary_max_tokens_param(max_tokens),
|
"max_tokens": max_tokens,
|
||||||
**({} if not _extra else {"extra_body": _extra}),
|
}
|
||||||
)
|
if model:
|
||||||
|
call_kwargs["model"] = model
|
||||||
|
response = await async_call_llm(**call_kwargs)
|
||||||
return response.choices[0].message.content.strip()
|
return response.choices[0].message.content.strip()
|
||||||
|
except RuntimeError:
|
||||||
|
logger.warning("No auxiliary model available for web content processing")
|
||||||
|
return None
|
||||||
except Exception as api_error:
|
except Exception as api_error:
|
||||||
last_error = api_error
|
last_error = api_error
|
||||||
if attempt < max_retries - 1:
|
if attempt < max_retries - 1:
|
||||||
|
|
@ -368,25 +360,18 @@ Synthesize these into ONE cohesive, comprehensive summary that:
|
||||||
Create a single, unified markdown summary."""
|
Create a single, unified markdown summary."""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if _aux_async_client is None:
|
call_kwargs = {
|
||||||
logger.warning("No auxiliary model for synthesis, concatenating summaries")
|
"task": "web_extract",
|
||||||
fallback = "\n\n".join(summaries)
|
"messages": [
|
||||||
if len(fallback) > max_output_size:
|
|
||||||
fallback = fallback[:max_output_size] + "\n\n[... truncated ...]"
|
|
||||||
return fallback
|
|
||||||
|
|
||||||
from agent.auxiliary_client import get_auxiliary_extra_body, auxiliary_max_tokens_param
|
|
||||||
_extra = get_auxiliary_extra_body()
|
|
||||||
response = await _aux_async_client.chat.completions.create(
|
|
||||||
model=model,
|
|
||||||
messages=[
|
|
||||||
{"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
|
{"role": "system", "content": "You synthesize multiple summaries into one cohesive, comprehensive summary. Be thorough but concise."},
|
||||||
{"role": "user", "content": synthesis_prompt}
|
{"role": "user", "content": synthesis_prompt}
|
||||||
],
|
],
|
||||||
temperature=0.1,
|
"temperature": 0.1,
|
||||||
**auxiliary_max_tokens_param(20000),
|
"max_tokens": 20000,
|
||||||
**({} if not _extra else {"extra_body": _extra}),
|
}
|
||||||
)
|
if model:
|
||||||
|
call_kwargs["model"] = model
|
||||||
|
response = await async_call_llm(**call_kwargs)
|
||||||
final_summary = response.choices[0].message.content.strip()
|
final_summary = response.choices[0].message.content.strip()
|
||||||
|
|
||||||
# Enforce hard cap
|
# Enforce hard cap
|
||||||
|
|
@ -713,8 +698,8 @@ async def web_extract_tool(
|
||||||
debug_call_data["pages_extracted"] = pages_extracted
|
debug_call_data["pages_extracted"] = pages_extracted
|
||||||
debug_call_data["original_response_size"] = len(json.dumps(response))
|
debug_call_data["original_response_size"] = len(json.dumps(response))
|
||||||
|
|
||||||
# Process each result with LLM if enabled and auxiliary client is available
|
# Process each result with LLM if enabled
|
||||||
if use_llm_processing and _aux_async_client is not None:
|
if use_llm_processing:
|
||||||
logger.info("Processing extracted content with LLM (parallel)...")
|
logger.info("Processing extracted content with LLM (parallel)...")
|
||||||
debug_call_data["processing_applied"].append("llm_processing")
|
debug_call_data["processing_applied"].append("llm_processing")
|
||||||
|
|
||||||
|
|
@ -780,10 +765,6 @@ async def web_extract_tool(
|
||||||
else:
|
else:
|
||||||
logger.warning("%s (no content to process)", url)
|
logger.warning("%s (no content to process)", url)
|
||||||
else:
|
else:
|
||||||
if use_llm_processing and _aux_async_client is None:
|
|
||||||
logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
|
|
||||||
debug_call_data["processing_applied"].append("llm_processing_unavailable")
|
|
||||||
|
|
||||||
# Print summary of extracted pages for debugging (original behavior)
|
# Print summary of extracted pages for debugging (original behavior)
|
||||||
for result in response.get('results', []):
|
for result in response.get('results', []):
|
||||||
url = result.get('url', 'Unknown URL')
|
url = result.get('url', 'Unknown URL')
|
||||||
|
|
@ -1013,8 +994,8 @@ async def web_crawl_tool(
|
||||||
debug_call_data["pages_crawled"] = pages_crawled
|
debug_call_data["pages_crawled"] = pages_crawled
|
||||||
debug_call_data["original_response_size"] = len(json.dumps(response))
|
debug_call_data["original_response_size"] = len(json.dumps(response))
|
||||||
|
|
||||||
# Process each result with LLM if enabled and auxiliary client is available
|
# Process each result with LLM if enabled
|
||||||
if use_llm_processing and _aux_async_client is not None:
|
if use_llm_processing:
|
||||||
logger.info("Processing crawled content with LLM (parallel)...")
|
logger.info("Processing crawled content with LLM (parallel)...")
|
||||||
debug_call_data["processing_applied"].append("llm_processing")
|
debug_call_data["processing_applied"].append("llm_processing")
|
||||||
|
|
||||||
|
|
@ -1080,10 +1061,6 @@ async def web_crawl_tool(
|
||||||
else:
|
else:
|
||||||
logger.warning("%s (no content to process)", page_url)
|
logger.warning("%s (no content to process)", page_url)
|
||||||
else:
|
else:
|
||||||
if use_llm_processing and _aux_async_client is None:
|
|
||||||
logger.warning("LLM processing requested but no auxiliary model available, returning raw content")
|
|
||||||
debug_call_data["processing_applied"].append("llm_processing_unavailable")
|
|
||||||
|
|
||||||
# Print summary of crawled pages for debugging (original behavior)
|
# Print summary of crawled pages for debugging (original behavior)
|
||||||
for result in response.get('results', []):
|
for result in response.get('results', []):
|
||||||
page_url = result.get('url', 'Unknown URL')
|
page_url = result.get('url', 'Unknown URL')
|
||||||
|
|
@ -1138,7 +1115,15 @@ def check_firecrawl_api_key() -> bool:
|
||||||
|
|
||||||
def check_auxiliary_model() -> bool:
|
def check_auxiliary_model() -> bool:
|
||||||
"""Check if an auxiliary text model is available for LLM content processing."""
|
"""Check if an auxiliary text model is available for LLM content processing."""
|
||||||
return _aux_async_client is not None
|
try:
|
||||||
|
from agent.auxiliary_client import resolve_provider_client
|
||||||
|
for p in ("openrouter", "nous", "custom", "codex"):
|
||||||
|
client, _ = resolve_provider_client(p)
|
||||||
|
if client is not None:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def get_debug_session_info() -> Dict[str, Any]:
|
def get_debug_session_info() -> Dict[str, Any]:
|
||||||
|
|
|
||||||
|
|
@ -344,38 +344,65 @@ class TrajectoryCompressor:
|
||||||
raise RuntimeError(f"Failed to load tokenizer '{self.config.tokenizer_name}': {e}")
|
raise RuntimeError(f"Failed to load tokenizer '{self.config.tokenizer_name}': {e}")
|
||||||
|
|
||||||
def _init_summarizer(self):
|
def _init_summarizer(self):
|
||||||
"""Initialize OpenRouter client for summarization (sync and async)."""
|
"""Initialize LLM routing for summarization (sync and async).
|
||||||
api_key = os.getenv(self.config.api_key_env)
|
|
||||||
if not api_key:
|
Uses call_llm/async_call_llm from the centralized provider router
|
||||||
raise RuntimeError(f"Missing API key. Set {self.config.api_key_env} environment variable.")
|
which handles auth, headers, and provider detection internally.
|
||||||
|
For custom endpoints, falls back to raw client construction.
|
||||||
from openai import OpenAI, AsyncOpenAI
|
"""
|
||||||
|
from agent.auxiliary_client import call_llm, async_call_llm
|
||||||
# OpenRouter app attribution headers (only for OpenRouter endpoints)
|
|
||||||
extra = {}
|
provider = self._detect_provider()
|
||||||
if "openrouter" in self.config.base_url.lower():
|
if provider:
|
||||||
extra["default_headers"] = {
|
# Store provider for use in _generate_summary calls
|
||||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
self._llm_provider = provider
|
||||||
"X-OpenRouter-Title": "Hermes Agent",
|
self._use_call_llm = True
|
||||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
# Verify the provider is available
|
||||||
}
|
from agent.auxiliary_client import resolve_provider_client
|
||||||
|
client, _ = resolve_provider_client(
|
||||||
# Sync client (for backwards compatibility)
|
provider, model=self.config.summarization_model)
|
||||||
self.client = OpenAI(
|
if client is None:
|
||||||
api_key=api_key,
|
raise RuntimeError(
|
||||||
base_url=self.config.base_url,
|
f"Provider '{provider}' is not configured. "
|
||||||
**extra,
|
f"Check your API key or run: hermes setup")
|
||||||
)
|
self.client = None # Not used directly
|
||||||
|
self.async_client = None # Not used directly
|
||||||
# Async client for parallel processing
|
else:
|
||||||
self.async_client = AsyncOpenAI(
|
# Custom endpoint — use config's raw base_url + api_key_env
|
||||||
api_key=api_key,
|
self._use_call_llm = False
|
||||||
base_url=self.config.base_url,
|
api_key = os.getenv(self.config.api_key_env)
|
||||||
**extra,
|
if not api_key:
|
||||||
)
|
raise RuntimeError(
|
||||||
|
f"Missing API key. Set {self.config.api_key_env} "
|
||||||
print(f"✅ Initialized OpenRouter client: {self.config.summarization_model}")
|
f"environment variable.")
|
||||||
|
from openai import OpenAI, AsyncOpenAI
|
||||||
|
self.client = OpenAI(
|
||||||
|
api_key=api_key, base_url=self.config.base_url)
|
||||||
|
self.async_client = AsyncOpenAI(
|
||||||
|
api_key=api_key, base_url=self.config.base_url)
|
||||||
|
|
||||||
|
print(f"✅ Initialized summarizer client: {self.config.summarization_model}")
|
||||||
print(f" Max concurrent requests: {self.config.max_concurrent_requests}")
|
print(f" Max concurrent requests: {self.config.max_concurrent_requests}")
|
||||||
|
|
||||||
|
def _detect_provider(self) -> str:
|
||||||
|
"""Detect the provider name from the configured base_url."""
|
||||||
|
url = self.config.base_url.lower()
|
||||||
|
if "openrouter" in url:
|
||||||
|
return "openrouter"
|
||||||
|
if "nousresearch.com" in url:
|
||||||
|
return "nous"
|
||||||
|
if "chatgpt.com/backend-api/codex" in url:
|
||||||
|
return "codex"
|
||||||
|
if "api.z.ai" in url:
|
||||||
|
return "zai"
|
||||||
|
if "moonshot.ai" in url or "api.kimi.com" in url:
|
||||||
|
return "kimi-coding"
|
||||||
|
if "minimaxi.com" in url:
|
||||||
|
return "minimax-cn"
|
||||||
|
if "minimax.io" in url:
|
||||||
|
return "minimax"
|
||||||
|
# Unknown base_url — not a known provider
|
||||||
|
return ""
|
||||||
|
|
||||||
def count_tokens(self, text: str) -> int:
|
def count_tokens(self, text: str) -> int:
|
||||||
"""Count tokens in text using the configured tokenizer."""
|
"""Count tokens in text using the configured tokenizer."""
|
||||||
|
|
@ -501,12 +528,22 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||||
try:
|
try:
|
||||||
metrics.summarization_api_calls += 1
|
metrics.summarization_api_calls += 1
|
||||||
|
|
||||||
response = self.client.chat.completions.create(
|
if getattr(self, '_use_call_llm', False):
|
||||||
model=self.config.summarization_model,
|
from agent.auxiliary_client import call_llm
|
||||||
messages=[{"role": "user", "content": prompt}],
|
response = call_llm(
|
||||||
temperature=self.config.temperature,
|
provider=self._llm_provider,
|
||||||
max_tokens=self.config.summary_target_tokens * 2,
|
model=self.config.summarization_model,
|
||||||
)
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
temperature=self.config.temperature,
|
||||||
|
max_tokens=self.config.summary_target_tokens * 2,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
response = self.client.chat.completions.create(
|
||||||
|
model=self.config.summarization_model,
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
temperature=self.config.temperature,
|
||||||
|
max_tokens=self.config.summary_target_tokens * 2,
|
||||||
|
)
|
||||||
|
|
||||||
summary = response.choices[0].message.content.strip()
|
summary = response.choices[0].message.content.strip()
|
||||||
|
|
||||||
|
|
@ -558,12 +595,22 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||||
try:
|
try:
|
||||||
metrics.summarization_api_calls += 1
|
metrics.summarization_api_calls += 1
|
||||||
|
|
||||||
response = await self.async_client.chat.completions.create(
|
if getattr(self, '_use_call_llm', False):
|
||||||
model=self.config.summarization_model,
|
from agent.auxiliary_client import async_call_llm
|
||||||
messages=[{"role": "user", "content": prompt}],
|
response = await async_call_llm(
|
||||||
temperature=self.config.temperature,
|
provider=self._llm_provider,
|
||||||
max_tokens=self.config.summary_target_tokens * 2,
|
model=self.config.summarization_model,
|
||||||
)
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
temperature=self.config.temperature,
|
||||||
|
max_tokens=self.config.summary_target_tokens * 2,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
response = await self.async_client.chat.completions.create(
|
||||||
|
model=self.config.summarization_model,
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
temperature=self.config.temperature,
|
||||||
|
max_tokens=self.config.summary_target_tokens * 2,
|
||||||
|
)
|
||||||
|
|
||||||
summary = response.choices[0].message.content.strip()
|
summary = response.choices[0].message.content.strip()
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue