refactor: unify vision backend gating

This commit is contained in:
teknium1 2026-03-14 20:22:13 -07:00
parent 799114ac8b
commit dc11b86e4b
7 changed files with 292 additions and 171 deletions

View file

@ -460,33 +460,15 @@ def _print_setup_summary(config: dict, hermes_home):
tool_status = []
# Vision — works with OpenRouter, Nous OAuth, Codex OAuth, or OpenAI endpoint
_has_vision = False
if get_env_value("OPENROUTER_API_KEY"):
_has_vision = True
else:
try:
_vauth_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "auth.json"
if _vauth_path.is_file():
import json as _vjson
# Vision — use the same runtime resolver as the actual vision tools
try:
from agent.auxiliary_client import get_available_vision_backends
_vauth = _vjson.loads(_vauth_path.read_text())
if _vauth.get("active_provider") == "nous":
_np = _vauth.get("providers", {}).get("nous", {})
if _np.get("agent_key") or _np.get("access_token"):
_has_vision = True
elif _vauth.get("active_provider") == "openai-codex":
_cp = _vauth.get("providers", {}).get("openai-codex", {})
if _cp.get("tokens", {}).get("access_token"):
_has_vision = True
except Exception:
pass
if not _has_vision:
_oai_base = get_env_value("OPENAI_BASE_URL") or ""
if get_env_value("OPENAI_API_KEY") and "api.openai.com" in _oai_base.lower():
_has_vision = True
_vision_backends = get_available_vision_backends()
except Exception:
_vision_backends = []
if _has_vision:
if _vision_backends:
tool_status.append(("Vision (image analysis)", True, None))
else:
tool_status.append(("Vision (image analysis)", False, "run 'hermes setup' to configure"))
@ -1276,58 +1258,22 @@ def setup_model_provider(config: dict):
selected_provider = "openrouter"
# ── Vision & Image Analysis Setup ──
# Vision requires a multimodal-capable provider. Check whether the user's
# chosen provider already covers it — if so, skip the prompt entirely.
_vision_needs_setup = True
# Keep setup aligned with the actual runtime resolver the vision tools use.
try:
from agent.auxiliary_client import get_available_vision_backends
if selected_provider == "openrouter":
# OpenRouter → Gemini for vision, already configured
_vision_needs_setup = False
elif selected_provider == "nous":
# Nous Portal OAuth → Gemini via Nous, already configured
_vision_needs_setup = False
elif selected_provider == "openai-codex":
# Codex OAuth → gpt-5.3-codex supports vision
_vision_needs_setup = False
elif selected_provider == "custom":
_custom_base = (get_env_value("OPENAI_BASE_URL") or "").lower()
if "api.openai.com" in _custom_base:
# Direct OpenAI endpoint — show vision model picker
print()
print_header("Vision Model")
print_info("Your OpenAI endpoint supports vision. Pick a model for image analysis:")
_oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
_vm_choices = _oai_vision_models + ["Keep default (gpt-4o-mini)"]
_vm_idx = prompt_choice("Select vision model:", _vm_choices, len(_vm_choices) - 1)
_selected_vision_model = (
_oai_vision_models[_vm_idx]
if _vm_idx < len(_oai_vision_models)
else "gpt-4o-mini"
)
save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model)
print_success(f"Vision model set to {_selected_vision_model}")
_vision_needs_setup = False
_vision_backends = set(get_available_vision_backends())
except Exception:
_vision_backends = set()
# Even for providers without native vision, check if existing credentials
# from a previous setup already cover it (e.g. user had OpenRouter before
# switching to z.ai)
if _vision_needs_setup:
if get_env_value("OPENROUTER_API_KEY"):
_vision_needs_setup = False
else:
# Check for Nous Portal OAuth in auth.json
try:
_auth_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "auth.json"
if _auth_path.is_file():
import json as _json
_vision_needs_setup = not bool(_vision_backends)
_auth_data = _json.loads(_auth_path.read_text())
if _auth_data.get("active_provider") == "nous":
_nous_p = _auth_data.get("providers", {}).get("nous", {})
if _nous_p.get("agent_key") or _nous_p.get("access_token"):
_vision_needs_setup = False
except Exception:
pass
if selected_provider in {"openrouter", "nous", "openai-codex"}:
# If the user just selected one of our known-good vision backends during
# setup, treat vision as covered. Auth/setup failure returns earlier.
_vision_needs_setup = False
elif selected_provider == "custom" and "custom" in _vision_backends:
_vision_needs_setup = False
if _vision_needs_setup:
_prov_names = {
@ -1343,44 +1289,54 @@ def setup_model_provider(config: dict):
print()
print_header("Vision & Image Analysis (optional)")
print_info(f"Vision requires a multimodal-capable provider. {_prov_display}")
print_info("doesn't natively support it. Choose how to enable vision,")
print_info("or skip to configure later.")
print_info(f"Vision uses a separate multimodal backend. {_prov_display}")
print_info("doesn't currently provide one Hermes can auto-use for vision,")
print_info("so choose a backend now or skip and configure later.")
print()
_vision_choices = [
"OpenRouter — uses Gemini (free tier at openrouter.ai/keys)",
"OpenAI — enter API key & choose a vision model",
"OpenAI-compatible endpoint — base URL, API key, and vision model",
"Skip for now",
]
_vision_idx = prompt_choice("Configure vision:", _vision_choices, 2)
if _vision_idx == 0: # OpenRouter
_or_key = prompt(" OpenRouter API key", password=True)
_or_key = prompt(" OpenRouter API key", password=True).strip()
if _or_key:
save_env_value("OPENROUTER_API_KEY", _or_key)
print_success("OpenRouter key saved — vision will use Gemini")
else:
print_info("Skipped — vision won't be available")
elif _vision_idx == 1: # OpenAI
_oai_key = prompt(" OpenAI API key", password=True)
elif _vision_idx == 1: # OpenAI-compatible endpoint
_base_url = prompt(" Base URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
_api_key_label = " API key"
if "api.openai.com" in _base_url.lower():
_api_key_label = " OpenAI API key"
_oai_key = prompt(_api_key_label, password=True).strip()
if _oai_key:
save_env_value("OPENAI_API_KEY", _oai_key)
save_env_value("OPENAI_BASE_URL", "https://api.openai.com/v1")
_oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
_vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
_vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)
_selected_vision_model = (
_oai_vision_models[_vm_idx]
if _vm_idx < len(_oai_vision_models)
else "gpt-4o-mini"
)
save_env_value("OPENAI_BASE_URL", _base_url)
if "api.openai.com" in _base_url.lower():
_oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
_vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
_vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)
_selected_vision_model = (
_oai_vision_models[_vm_idx]
if _vm_idx < len(_oai_vision_models)
else "gpt-4o-mini"
)
else:
_selected_vision_model = prompt(" Vision model (blank = use main/custom default)").strip()
save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model)
print_success(f"Vision configured with OpenAI ({_selected_vision_model})")
print_success(
f"Vision configured with {_base_url}"
+ (f" ({_selected_vision_model})" if _selected_vision_model else "")
)
else:
print_info("Skipped — vision won't be available")
else:
print_info("Skipped — add later with 'hermes config set OPENROUTER_API_KEY ...'")
print_info("Skipped — add later with 'hermes setup' or configure AUXILIARY_VISION_* settings")
# ── Model Selection (adapts based on provider) ──
if selected_provider != "custom": # Custom already prompted for model name

View file

@ -362,14 +362,21 @@ def _save_platform_tools(config: dict, platform: str, enabled_toolset_keys: Set[
def _toolset_has_keys(ts_key: str) -> bool:
"""Check if a toolset's required API keys are configured."""
if ts_key == "vision":
try:
from agent.auxiliary_client import resolve_vision_provider_client
_provider, client, _model = resolve_vision_provider_client()
return client is not None
except Exception:
return False
# Check TOOL_CATEGORIES first (provider-aware)
cat = TOOL_CATEGORIES.get(ts_key)
if cat:
for provider in cat["providers"]:
for provider in cat.get("providers", []):
env_vars = provider.get("env_vars", [])
if not env_vars:
return True # Free provider (e.g., Edge TTS)
if all(get_env_value(v["key"]) for v in env_vars):
if env_vars and all(get_env_value(e["key"]) for e in env_vars):
return True
return False
@ -628,6 +635,39 @@ def _configure_provider(provider: dict, config: dict):
def _configure_simple_requirements(ts_key: str):
"""Simple fallback for toolsets that just need env vars (no provider selection)."""
if ts_key == "vision":
if _toolset_has_keys("vision"):
return
print()
print(color(" Vision / Image Analysis requires a multimodal backend:", Colors.YELLOW))
choices = [
"OpenRouter — uses Gemini",
"OpenAI-compatible endpoint — base URL, API key, and vision model",
"Skip",
]
idx = _prompt_choice(" Configure vision backend", choices, 2)
if idx == 0:
_print_info(" Get key at: https://openrouter.ai/keys")
value = _prompt(" OPENROUTER_API_KEY", password=True)
if value and value.strip():
save_env_value("OPENROUTER_API_KEY", value.strip())
_print_success(" Saved")
else:
_print_warning(" Skipped")
elif idx == 1:
base_url = _prompt(" OPENAI_BASE_URL (blank for OpenAI)").strip() or "https://api.openai.com/v1"
key_label = " OPENAI_API_KEY" if "api.openai.com" in base_url.lower() else " API key"
api_key = _prompt(key_label, password=True)
if api_key and api_key.strip():
save_env_value("OPENAI_BASE_URL", base_url)
save_env_value("OPENAI_API_KEY", api_key.strip())
if "api.openai.com" in base_url.lower():
save_env_value("AUXILIARY_VISION_MODEL", "gpt-4o-mini")
_print_success(" Saved")
else:
_print_warning(" Skipped")
return
requirements = TOOLSET_ENV_REQUIREMENTS.get(ts_key, [])
if not requirements:
return