Merge pull request #1323 from NousResearch/hermes/hermes-1fc28d17

fix: smart vision setup that respects the user's chosen provider
2026-03-14 10:40:57 -07:00 · 2026-03-14 10:40:57 -07:00 · 14738e0872
commit 14738e0872
parent d2e2d6e2a2 ee73b6bf27
2 changed files with 204 additions and 35 deletions
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -460,12 +460,41 @@ def _print_setup_summary(config: dict, hermes_home):

    tool_status = []

-    # OpenRouter (required for vision, moa)
+    # Vision — works with OpenRouter, Nous OAuth, Codex OAuth, or OpenAI endpoint
+    _has_vision = False
    if get_env_value("OPENROUTER_API_KEY"):
+        _has_vision = True
+    else:
+        try:
+            _vauth_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "auth.json"
+            if _vauth_path.is_file():
+                import json as _vjson
+
+                _vauth = _vjson.loads(_vauth_path.read_text())
+                if _vauth.get("active_provider") == "nous":
+                    _np = _vauth.get("providers", {}).get("nous", {})
+                    if _np.get("agent_key") or _np.get("access_token"):
+                        _has_vision = True
+                elif _vauth.get("active_provider") == "openai-codex":
+                    _cp = _vauth.get("providers", {}).get("openai-codex", {})
+                    if _cp.get("tokens", {}).get("access_token"):
+                        _has_vision = True
+        except Exception:
+            pass
+    if not _has_vision:
+        _oai_base = get_env_value("OPENAI_BASE_URL") or ""
+        if get_env_value("OPENAI_API_KEY") and "api.openai.com" in _oai_base.lower():
+            _has_vision = True
+
+    if _has_vision:
        tool_status.append(("Vision (image analysis)", True, None))
+    else:
+        tool_status.append(("Vision (image analysis)", False, "run 'hermes setup' to configure"))
+
+    # Mixture of Agents — requires OpenRouter specifically (calls multiple models)
+    if get_env_value("OPENROUTER_API_KEY"):
        tool_status.append(("Mixture of Agents", True, None))
    else:
-        tool_status.append(("Vision (image analysis)", False, "OPENROUTER_API_KEY"))
        tool_status.append(("Mixture of Agents", False, "OPENROUTER_API_KEY"))

    # Firecrawl (web tools)
@ -1246,35 +1275,112 @@ def setup_model_provider(config: dict):
        elif existing_or:
            selected_provider = "openrouter"

-    # ── OpenRouter API Key for tools (if not already set) ──
-    # Tools (vision, web, MoA) use OpenRouter independently of the main provider.
-    # Prompt for OpenRouter key if not set and a non-OpenRouter provider was chosen.
-    if selected_provider in (
-        "nous",
-        "openai-codex",
-        "custom",
-        "zai",
-        "kimi-coding",
-        "minimax",
-        "minimax-cn",
-        "anthropic",
-    ) and not get_env_value("OPENROUTER_API_KEY"):
-        print()
-        print_header("OpenRouter API Key (for tools)")
-        print_info("Tools like vision analysis, web search, and MoA use OpenRouter")
-        print_info("independently of your main inference provider.")
-        print_info("Get your API key at: https://openrouter.ai/keys")
+    # ── Vision & Image Analysis Setup ──
+    # Vision requires a multimodal-capable provider. Check whether the user's
+    # chosen provider already covers it — if so, skip the prompt entirely.
+    _vision_needs_setup = True

-        api_key = prompt(
-            "  OpenRouter API key (optional, press Enter to skip)", password=True
-        )
-        if api_key:
-            save_env_value("OPENROUTER_API_KEY", api_key)
-            print_success("OpenRouter API key saved (for tools)")
-        else:
-            print_info(
-                "Skipped - some tools (vision, web scraping) won't work without this"
+    if selected_provider == "openrouter":
+        # OpenRouter → Gemini for vision, already configured
+        _vision_needs_setup = False
+    elif selected_provider == "nous":
+        # Nous Portal OAuth → Gemini via Nous, already configured
+        _vision_needs_setup = False
+    elif selected_provider == "openai-codex":
+        # Codex OAuth → gpt-5.3-codex supports vision
+        _vision_needs_setup = False
+    elif selected_provider == "custom":
+        _custom_base = (get_env_value("OPENAI_BASE_URL") or "").lower()
+        if "api.openai.com" in _custom_base:
+            # Direct OpenAI endpoint — show vision model picker
+            print()
+            print_header("Vision Model")
+            print_info("Your OpenAI endpoint supports vision. Pick a model for image analysis:")
+            _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
+            _vm_choices = _oai_vision_models + ["Keep default (gpt-4o-mini)"]
+            _vm_idx = prompt_choice("Select vision model:", _vm_choices, len(_vm_choices) - 1)
+            _selected_vision_model = (
+                _oai_vision_models[_vm_idx]
+                if _vm_idx < len(_oai_vision_models)
+                else "gpt-4o-mini"
            )
+            save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model)
+            print_success(f"Vision model set to {_selected_vision_model}")
+            _vision_needs_setup = False
+
+    # Even for providers without native vision, check if existing credentials
+    # from a previous setup already cover it (e.g. user had OpenRouter before
+    # switching to z.ai)
+    if _vision_needs_setup:
+        if get_env_value("OPENROUTER_API_KEY"):
+            _vision_needs_setup = False
+        else:
+            # Check for Nous Portal OAuth in auth.json
+            try:
+                _auth_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "auth.json"
+                if _auth_path.is_file():
+                    import json as _json
+
+                    _auth_data = _json.loads(_auth_path.read_text())
+                    if _auth_data.get("active_provider") == "nous":
+                        _nous_p = _auth_data.get("providers", {}).get("nous", {})
+                        if _nous_p.get("agent_key") or _nous_p.get("access_token"):
+                            _vision_needs_setup = False
+            except Exception:
+                pass
+
+    if _vision_needs_setup:
+        _prov_names = {
+            "nous-api": "Nous Portal API key",
+            "zai": "Z.AI / GLM",
+            "kimi-coding": "Kimi / Moonshot",
+            "minimax": "MiniMax",
+            "minimax-cn": "MiniMax CN",
+            "anthropic": "Anthropic",
+            "custom": "your custom endpoint",
+        }
+        _prov_display = _prov_names.get(selected_provider, selected_provider or "your provider")
+
+        print()
+        print_header("Vision & Image Analysis (optional)")
+        print_info(f"Vision requires a multimodal-capable provider. {_prov_display}")
+        print_info("doesn't natively support it. Choose how to enable vision,")
+        print_info("or skip to configure later.")
+        print()
+
+        _vision_choices = [
+            "OpenRouter — uses Gemini (free tier at openrouter.ai/keys)",
+            "OpenAI — enter API key & choose a vision model",
+            "Skip for now",
+        ]
+        _vision_idx = prompt_choice("Configure vision:", _vision_choices, 2)
+
+        if _vision_idx == 0:  # OpenRouter
+            _or_key = prompt("  OpenRouter API key", password=True)
+            if _or_key:
+                save_env_value("OPENROUTER_API_KEY", _or_key)
+                print_success("OpenRouter key saved — vision will use Gemini")
+            else:
+                print_info("Skipped — vision won't be available")
+        elif _vision_idx == 1:  # OpenAI
+            _oai_key = prompt("  OpenAI API key", password=True)
+            if _oai_key:
+                save_env_value("OPENAI_API_KEY", _oai_key)
+                save_env_value("OPENAI_BASE_URL", "https://api.openai.com/v1")
+                _oai_vision_models = ["gpt-4o", "gpt-4o-mini", "gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano"]
+                _vm_choices = _oai_vision_models + ["Use default (gpt-4o-mini)"]
+                _vm_idx = prompt_choice("Select vision model:", _vm_choices, 0)
+                _selected_vision_model = (
+                    _oai_vision_models[_vm_idx]
+                    if _vm_idx < len(_oai_vision_models)
+                    else "gpt-4o-mini"
+                )
+                save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model)
+                print_success(f"Vision configured with OpenAI ({_selected_vision_model})")
+            else:
+                print_info("Skipped — vision won't be available")
+        else:
+            print_info("Skipped — add later with 'hermes config set OPENROUTER_API_KEY ...'")

    # ── Model Selection (adapts based on provider) ──
    if selected_provider != "custom":  # Custom already prompted for model name
--- a/tests/hermes_cli/test_setup_model_provider.py
+++ b/tests/hermes_cli/test_setup_model_provider.py
@ -3,7 +3,7 @@
 from __future__ import annotations

 from hermes_cli.config import load_config, save_config, save_env_value
-from hermes_cli.setup import setup_model_provider
+from hermes_cli.setup import _print_setup_summary, setup_model_provider


 def _read_env(home):
@ -50,11 +50,15 @@ def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, m

    calls = {"count": 0}

-    def fake_prompt_choice(_question, choices, default=0):
+    def fake_prompt_choice(question, choices, default=0):
        calls["count"] += 1
        if calls["count"] == 1:
            assert choices[-1] == "Keep current (Custom: https://example.invalid/v1)"
            return len(choices) - 1
+        if calls["count"] == 2:
+            assert question == "Configure vision:"
+            assert choices[-1] == "Skip for now"
+            return len(choices) - 1
        raise AssertionError("Model menu should not appear for keep-current custom")

    monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
@ -70,7 +74,7 @@ def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, m
    assert reloaded["model"]["provider"] == "custom"
    assert reloaded["model"]["default"] == "custom/model"
    assert reloaded["model"]["base_url"] == "https://example.invalid/v1"
-    assert calls["count"] == 1
+    assert calls["count"] == 2


 def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tmp_path, monkeypatch):
@ -88,13 +92,17 @@ def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tm
    captured = {"provider_choices": None, "model_choices": None}
    calls = {"count": 0}

-    def fake_prompt_choice(_question, choices, default=0):
+    def fake_prompt_choice(question, choices, default=0):
        calls["count"] += 1
        if calls["count"] == 1:
            captured["provider_choices"] = list(choices)
            assert choices[-1] == "Keep current (Anthropic)"
            return len(choices) - 1
        if calls["count"] == 2:
+            assert question == "Configure vision:"
+            assert choices[-1] == "Skip for now"
+            return len(choices) - 1
+        if calls["count"] == 3:
            captured["model_choices"] = list(choices)
            return len(choices) - 1  # keep current model
        raise AssertionError("Unexpected extra prompt_choice call")
@ -113,7 +121,43 @@ def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tm
    assert captured["model_choices"] is not None
    assert captured["model_choices"][0] == "claude-opus-4-6"
    assert "anthropic/claude-opus-4.6 (recommended)" not in captured["model_choices"]
-    assert calls["count"] == 2
+    assert calls["count"] == 3
+
+
+def test_setup_keep_current_anthropic_can_configure_openai_vision_default(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_provider_env(monkeypatch)
+
+    config = load_config()
+    config["model"] = {
+        "default": "claude-opus-4-6",
+        "provider": "anthropic",
+    }
+    save_config(config)
+
+    picks = iter([
+        9,  # keep current provider
+        1,  # configure vision with OpenAI
+        5,  # use default gpt-4o-mini vision model
+        4,  # keep current Anthropic model
+    ])
+
+    monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: next(picks))
+    monkeypatch.setattr(
+        "hermes_cli.setup.prompt",
+        lambda message, *args, **kwargs: "sk-openai" if "OpenAI API key" in message else "",
+    )
+    monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
+    monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
+    monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
+    monkeypatch.setattr("hermes_cli.models.provider_model_ids", lambda provider: [])
+
+    setup_model_provider(config)
+    env = _read_env(tmp_path)
+
+    assert env.get("OPENAI_API_KEY") == "sk-openai"
+    assert env.get("OPENAI_BASE_URL") == "https://api.openai.com/v1"
+    assert env.get("AUXILIARY_VISION_MODEL") == "gpt-4o-mini"


 def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config(tmp_path, monkeypatch):
@ -144,7 +188,7 @@ def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config(
        "hermes_cli.auth.resolve_codex_runtime_credentials",
        lambda *args, **kwargs: {
            "base_url": "https://chatgpt.com/backend-api/codex",
-            "api_key": "codex-access-token",
+            "api_key": "codex-...oken",
        },
    )
    monkeypatch.setattr(
@ -163,3 +207,22 @@ def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config(
    assert reloaded["model"]["provider"] == "openai-codex"
    assert reloaded["model"]["default"] == "openai/gpt-5.3-codex"
    assert reloaded["model"]["base_url"] == "https://chatgpt.com/backend-api/codex"
+
+
+def test_setup_summary_marks_codex_auth_as_vision_available(tmp_path, monkeypatch, capsys):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+    _clear_provider_env(monkeypatch)
+
+    (tmp_path / "auth.json").write_text(
+        '{"active_provider":"openai-codex","providers":{"openai-codex":{"tokens":{"access_token":"tok"}}}}'
+    )
+
+    monkeypatch.setattr("shutil.which", lambda _name: None)
+
+    _print_setup_summary(load_config(), tmp_path)
+    output = capsys.readouterr().out
+
+    assert "Vision (image analysis)" in output
+    assert "missing run 'hermes setup' to configure" not in output
+    assert "Mixture of Agents" in output
+    assert "missing OPENROUTER_API_KEY" in output