fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini (#1189)

* fix: prevent model/provider mismatch when switching providers during active gateway When _update_config_for_provider() writes the new provider and base_url to config.yaml, the gateway (which re-reads config per-message) can pick up the change before model selection completes. This causes the old model name (e.g. 'anthropic/claude-opus-4.6') to be sent to the new provider's API (e.g. MiniMax), which fails. Changes: - _update_config_for_provider() now accepts an optional default_model parameter. When provided and the current model.default is empty or uses OpenRouter format (contains '/'), it sets a safe default model for the new provider. - All setup.py callers for direct-API providers (zai, kimi, minimax, minimax-cn, anthropic) now pass a provider-appropriate default model. - _setup_provider_model_selection() now validates the 'Keep current' choice: if the current model uses OpenRouter format and wouldn't work with the new provider, it warns and switches to the provider's first default model instead of silently keeping the incompatible name. Reported by a user on Home Assistant whose gateway started sending 'anthropic/claude-opus-4.6' to MiniMax's API after running hermes setup. * fix: auxiliary client uses main model for custom/local endpoints instead of gpt-4o-mini When a user runs a local server (e.g. Qwen3.5-9B via OPENAI_BASE_URL), the auxiliary client (context compression, vision, session search) would send requests for 'gpt-4o-mini' or 'google/gemini-3-flash-preview' to the local server, which only serves one model — causing 404 errors mid-task. Changes: - _try_custom_endpoint() now reads the user's configured main model via _read_main_model() (checks OPENAI_MODEL → HERMES_MODEL → LLM_MODEL → config.yaml model.default) instead of hardcoding 'gpt-4o-mini'. - resolve_provider_client() auto mode now detects when an OpenRouter- formatted model override (containing '/') would be sent to a non- OpenRouter provider (like a local server) and drops it in favor of the provider's default model. - Test isolation fixes: properly clear env vars in 'nothing available' tests to prevent host environment leakage.
2026-03-13 10:02:16 -07:00 · 2026-03-13 10:02:16 -07:00 · 11b577671b
commit 11b577671b
parent 153ccbfd61
2 changed files with 53 additions and 8 deletions
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -129,6 +129,7 @@ class TestGetTextAuxiliaryClient:
    def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir):
        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
        monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
+        monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
        # Override the autouse monkeypatch for codex
        monkeypatch.setattr(
            "agent.auxiliary_client._read_codex_access_token",
@ -137,7 +138,7 @@ class TestGetTextAuxiliaryClient:
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_text_auxiliary_client()
-        assert model == "gpt-4o-mini"
+        assert model == "my-local-model"
        call_kwargs = mock_openai.call_args
        assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"

@ -150,9 +151,13 @@ class TestGetTextAuxiliaryClient:
        from agent.auxiliary_client import CodexAuxiliaryClient
        assert isinstance(client, CodexAuxiliaryClient)

-    def test_returns_none_when_nothing_available(self):
+    def test_returns_none_when_nothing_available(self, monkeypatch):
+        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
-             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
+             patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
            client, model = get_text_auxiliary_client()
        assert client is None
        assert model is None
@ -209,17 +214,21 @@ class TestVisionClientFallback:
        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_vision_auxiliary_client()
        assert client is not None
-        assert model == "gpt-4o-mini"
+        assert model == "my-local-model"

    def test_vision_forced_main_returns_none_without_creds(self, monkeypatch):
        """Forced main with no credentials still returns None."""
        monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main")
+        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
-             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \
+             patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)):
            client, model = get_vision_auxiliary_client()
        assert client is None
        assert model is None
@ -305,21 +314,23 @@ class TestResolveForcedProvider:
    def test_forced_main_uses_custom(self, monkeypatch):
        monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1")
        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = _resolve_forced_provider("main")
-        assert model == "gpt-4o-mini"
+        assert model == "my-local-model"

    def test_forced_main_skips_openrouter_nous(self, monkeypatch):
        """Even if OpenRouter key is set, 'main' skips it."""
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
        monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1")
        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        monkeypatch.setenv("OPENAI_MODEL", "my-local-model")
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = _resolve_forced_provider("main")
        # Should use custom endpoint, not OpenRouter
-        assert model == "gpt-4o-mini"
+        assert model == "my-local-model"

    def test_forced_main_falls_to_codex(self, codex_auth_dir, monkeypatch):
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \