feat: overhaul context length detection with models.dev and provider-aware resolution (#2158)

Replace the fragile hardcoded context length system with a multi-source resolution chain that correctly identifies context windows per provider. Key changes: - New agent/models_dev.py: Fetches and caches the models.dev registry (3800+ models across 100+ providers with per-provider context windows). In-memory cache (1hr TTL) + disk cache for cold starts. - Rewritten get_model_context_length() resolution chain: 0. Config override (model.context_length) 1. Custom providers per-model context_length 2. Persistent disk cache 3. Endpoint /models (local servers) 4. Anthropic /v1/models API (max_input_tokens, API-key only) 5. OpenRouter live API (existing, unchanged) 6. Nous suffix-match via OpenRouter (dot/dash normalization) 7. models.dev registry lookup (provider-aware) 8. Thin hardcoded defaults (broad family patterns) 9. 128K fallback (was 2M) - Provider-aware context: same model now correctly resolves to different context windows per provider (e.g. claude-opus-4.6: 1M on Anthropic, 128K on GitHub Copilot). Provider name flows through ContextCompressor. - DEFAULT_CONTEXT_LENGTHS shrunk from 80+ entries to ~16 broad patterns. models.dev replaces the per-model hardcoding. - CONTEXT_PROBE_TIERS changed from [2M, 1M, 512K, 200K, 128K, 64K, 32K] to [128K, 64K, 32K, 16K, 8K]. Unknown models no longer start at 2M. - hermes model: prompts for context_length when configuring custom endpoints. Supports shorthand (32k, 128K). Saved to custom_providers per-model config. - custom_providers schema extended with optional models dict for per-model context_length (backward compatible). - Nous Portal: suffix-matches bare IDs (claude-opus-4-6) against OpenRouter's prefixed IDs (anthropic/claude-opus-4.6) with dot/dash normalization. Handles all 15 current Nous models. - Anthropic direct: queries /v1/models for max_input_tokens. Only works with regular API keys (sk-ant-api*), not OAuth tokens. Falls through to models.dev for OAuth users. Tests: 5574 passed (18 new tests for models_dev + updated probe tiers) Docs: Updated configuration.md context length section, AGENTS.md Co-authored-by: Test <test@test.com>
2026-03-20 06:04:33 -07:00 · 2026-03-20 06:04:33 -07:00 · 88643a1ba9
commit 88643a1ba9
parent b7b585656b
13 changed files with 662 additions and 246 deletions
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -1137,10 +1137,21 @@ def _model_flow_custom(config):
        base_url = input(f"API base URL [{current_url or 'e.g. https://api.example.com/v1'}]: ").strip()
        api_key = input(f"API key [{current_key[:8] + '...' if current_key else 'optional'}]: ").strip()
        model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip()
+        context_length_str = input("Context length in tokens [leave blank for auto-detect]: ").strip()
    except (KeyboardInterrupt, EOFError):
        print("\nCancelled.")
        return

+    context_length = None
+    if context_length_str:
+        try:
+            context_length = int(context_length_str.replace(",", "").replace("k", "000").replace("K", "000"))
+            if context_length <= 0:
+                context_length = None
+        except ValueError:
+            print(f"Invalid context length: {context_length_str} — will auto-detect.")
+            context_length = None
+
    if not base_url and not current_url:
        print("No URL provided. Cancelled.")
        return
@ -1203,14 +1214,14 @@ def _model_flow_custom(config):
        print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.")

    # Auto-save to custom_providers so it appears in the menu next time
-    _save_custom_provider(effective_url, effective_key, model_name or "")
+    _save_custom_provider(effective_url, effective_key, model_name or "", context_length=context_length)


-def _save_custom_provider(base_url, api_key="", model=""):
+def _save_custom_provider(base_url, api_key="", model="", context_length=None):
    """Save a custom endpoint to custom_providers in config.yaml.

    Deduplicates by base_url — if the URL already exists, updates the
-    model name but doesn't add a duplicate entry.
+    model name and context_length but doesn't add a duplicate entry.
    Auto-generates a display name from the URL hostname.
    """
    from hermes_cli.config import load_config, save_config
@ -1220,14 +1231,24 @@ def _save_custom_provider(base_url, api_key="", model=""):
    if not isinstance(providers, list):
        providers = []

-    # Check if this URL is already saved — update model if so
+    # Check if this URL is already saved — update model/context_length if so
    for entry in providers:
        if isinstance(entry, dict) and entry.get("base_url", "").rstrip("/") == base_url.rstrip("/"):
+            changed = False
            if model and entry.get("model") != model:
                entry["model"] = model
+                changed = True
+            if model and context_length:
+                models_cfg = entry.get("models", {})
+                if not isinstance(models_cfg, dict):
+                    models_cfg = {}
+                models_cfg[model] = {"context_length": context_length}
+                entry["models"] = models_cfg
+                changed = True
+            if changed:
                cfg["custom_providers"] = providers
                save_config(cfg)
-            return  # already saved, updated model if needed
+            return  # already saved, updated if needed

    # Auto-generate a name from the URL
    import re
@ -1249,6 +1270,8 @@ def _save_custom_provider(base_url, api_key="", model=""):
        entry["api_key"] = api_key
    if model:
        entry["model"] = model
+    if model and context_length:
+        entry["models"] = {model: {"context_length": context_length}}

    providers.append(entry)
    cfg["custom_providers"] = providers
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -1045,93 +1045,17 @@ def setup_model_provider(config: dict):
        print()
        print_header("Custom OpenAI-Compatible Endpoint")
        print_info("Works with any API that follows OpenAI's chat completions spec")
+        print()

-        current_url = get_env_value("OPENAI_BASE_URL") or ""
-        current_key = get_env_value("OPENAI_API_KEY")
-        _raw_model = config.get("model", "")
-        current_model = (
-            _raw_model.get("default", "")
-            if isinstance(_raw_model, dict)
-            else (_raw_model or "")
-        )
-
-        if current_url:
-            print_info(f"  Current URL: {current_url}")
-        if current_key:
-            print_info(f"  Current key: {current_key[:8]}... (configured)")
-
-        base_url = prompt(
-            "  API base URL (e.g., https://api.example.com/v1)", current_url
-        ).strip()
-        api_key = prompt("  API key", password=True)
-        model_name = prompt("  Model name (e.g., gpt-4, claude-3-opus)", current_model)
-
-        if base_url:
-            from hermes_cli.models import probe_api_models
-
-            probe = probe_api_models(api_key, base_url)
-            if probe.get("used_fallback") and probe.get("resolved_base_url"):
-                print_warning(
-                    f"Endpoint verification worked at {probe['resolved_base_url']}/models, "
-                    f"not the exact URL you entered. Saving the working base URL instead."
-                )
-                base_url = probe["resolved_base_url"]
-            elif probe.get("models") is not None:
-                print_success(
-                    f"Verified endpoint via {probe.get('probed_url')} "
-                    f"({len(probe.get('models') or [])} model(s) visible)"
-                )
-            else:
-                print_warning(
-                    f"Could not verify this endpoint via {probe.get('probed_url')}. "
-                    f"Hermes will still save it."
-                )
-                if probe.get("suggested_base_url"):
-                    print_info(
-                        f"  If this server expects /v1, try base URL: {probe['suggested_base_url']}"
-                    )
-
-            save_env_value("OPENAI_BASE_URL", base_url)
-        if api_key:
-            save_env_value("OPENAI_API_KEY", api_key)
-        if model_name:
-            _set_default_model(config, model_name)
-
-        try:
-            from hermes_cli.auth import deactivate_provider
-
-            deactivate_provider()
-        except Exception:
-            pass
-
-        # Save provider and base_url to config.yaml so the gateway and CLI
-        # both resolve the correct provider without relying on env-var heuristics.
-        if base_url:
-            import yaml
-
-            config_path = (
-                Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
-                / "config.yaml"
-            )
-            try:
-                disk_cfg = {}
-                if config_path.exists():
-                    disk_cfg = yaml.safe_load(config_path.read_text()) or {}
-                model_section = disk_cfg.get("model", {})
-                if isinstance(model_section, str):
-                    model_section = {"default": model_section}
-                model_section["provider"] = "custom"
-                model_section["base_url"] = base_url.rstrip("/")
-                if model_name:
-                    model_section["default"] = model_name
-                disk_cfg["model"] = model_section
-                config_path.write_text(yaml.safe_dump(disk_cfg, sort_keys=False))
-            except Exception as e:
-                logger.debug("Could not save provider to config.yaml: %s", e)
-
-            _set_model_provider(config, "custom", base_url)
-
-        print_success("Custom endpoint configured")
+        # Reuse the shared custom endpoint flow from `hermes model`.
+        # This handles: URL/key/model/context-length prompts, endpoint probing,
+        # env saving, config.yaml updates, and custom_providers persistence.
+        from hermes_cli.main import _model_flow_custom
+        _model_flow_custom(config)
+        # _model_flow_custom handles model selection, config, env vars,
+        # and custom_providers. Keep selected_provider = "custom" so
+        # the model selection step below is skipped (line 1631 check)
+        # but vision and TTS setup still run.

    elif provider_idx == 4:  # Z.AI / GLM
        selected_provider = "zai"