feat: overhaul context length detection with models.dev and provider-aware resolution (#2158)
Replace the fragile hardcoded context length system with a multi-source resolution chain that correctly identifies context windows per provider. Key changes: - New agent/models_dev.py: Fetches and caches the models.dev registry (3800+ models across 100+ providers with per-provider context windows). In-memory cache (1hr TTL) + disk cache for cold starts. - Rewritten get_model_context_length() resolution chain: 0. Config override (model.context_length) 1. Custom providers per-model context_length 2. Persistent disk cache 3. Endpoint /models (local servers) 4. Anthropic /v1/models API (max_input_tokens, API-key only) 5. OpenRouter live API (existing, unchanged) 6. Nous suffix-match via OpenRouter (dot/dash normalization) 7. models.dev registry lookup (provider-aware) 8. Thin hardcoded defaults (broad family patterns) 9. 128K fallback (was 2M) - Provider-aware context: same model now correctly resolves to different context windows per provider (e.g. claude-opus-4.6: 1M on Anthropic, 128K on GitHub Copilot). Provider name flows through ContextCompressor. - DEFAULT_CONTEXT_LENGTHS shrunk from 80+ entries to ~16 broad patterns. models.dev replaces the per-model hardcoding. - CONTEXT_PROBE_TIERS changed from [2M, 1M, 512K, 200K, 128K, 64K, 32K] to [128K, 64K, 32K, 16K, 8K]. Unknown models no longer start at 2M. - hermes model: prompts for context_length when configuring custom endpoints. Supports shorthand (32k, 128K). Saved to custom_providers per-model config. - custom_providers schema extended with optional models dict for per-model context_length (backward compatible). - Nous Portal: suffix-matches bare IDs (claude-opus-4-6) against OpenRouter's prefixed IDs (anthropic/claude-opus-4.6) with dot/dash normalization. Handles all 15 current Nous models. - Anthropic direct: queries /v1/models for max_input_tokens. Only works with regular API keys (sk-ant-api*), not OAuth tokens. Falls through to models.dev for OAuth users. Tests: 5574 passed (18 new tests for models_dev + updated probe tiers) Docs: Updated configuration.md context length section, AGENTS.md Co-authored-by: Test <test@test.com>
This commit is contained in:
parent
b7b585656b
commit
88643a1ba9
13 changed files with 662 additions and 246 deletions
|
|
@ -1137,10 +1137,21 @@ def _model_flow_custom(config):
|
|||
base_url = input(f"API base URL [{current_url or 'e.g. https://api.example.com/v1'}]: ").strip()
|
||||
api_key = input(f"API key [{current_key[:8] + '...' if current_key else 'optional'}]: ").strip()
|
||||
model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip()
|
||||
context_length_str = input("Context length in tokens [leave blank for auto-detect]: ").strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
|
||||
context_length = None
|
||||
if context_length_str:
|
||||
try:
|
||||
context_length = int(context_length_str.replace(",", "").replace("k", "000").replace("K", "000"))
|
||||
if context_length <= 0:
|
||||
context_length = None
|
||||
except ValueError:
|
||||
print(f"Invalid context length: {context_length_str} — will auto-detect.")
|
||||
context_length = None
|
||||
|
||||
if not base_url and not current_url:
|
||||
print("No URL provided. Cancelled.")
|
||||
return
|
||||
|
|
@ -1203,14 +1214,14 @@ def _model_flow_custom(config):
|
|||
print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.")
|
||||
|
||||
# Auto-save to custom_providers so it appears in the menu next time
|
||||
_save_custom_provider(effective_url, effective_key, model_name or "")
|
||||
_save_custom_provider(effective_url, effective_key, model_name or "", context_length=context_length)
|
||||
|
||||
|
||||
def _save_custom_provider(base_url, api_key="", model=""):
|
||||
def _save_custom_provider(base_url, api_key="", model="", context_length=None):
|
||||
"""Save a custom endpoint to custom_providers in config.yaml.
|
||||
|
||||
Deduplicates by base_url — if the URL already exists, updates the
|
||||
model name but doesn't add a duplicate entry.
|
||||
model name and context_length but doesn't add a duplicate entry.
|
||||
Auto-generates a display name from the URL hostname.
|
||||
"""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
|
@ -1220,14 +1231,24 @@ def _save_custom_provider(base_url, api_key="", model=""):
|
|||
if not isinstance(providers, list):
|
||||
providers = []
|
||||
|
||||
# Check if this URL is already saved — update model if so
|
||||
# Check if this URL is already saved — update model/context_length if so
|
||||
for entry in providers:
|
||||
if isinstance(entry, dict) and entry.get("base_url", "").rstrip("/") == base_url.rstrip("/"):
|
||||
changed = False
|
||||
if model and entry.get("model") != model:
|
||||
entry["model"] = model
|
||||
changed = True
|
||||
if model and context_length:
|
||||
models_cfg = entry.get("models", {})
|
||||
if not isinstance(models_cfg, dict):
|
||||
models_cfg = {}
|
||||
models_cfg[model] = {"context_length": context_length}
|
||||
entry["models"] = models_cfg
|
||||
changed = True
|
||||
if changed:
|
||||
cfg["custom_providers"] = providers
|
||||
save_config(cfg)
|
||||
return # already saved, updated model if needed
|
||||
return # already saved, updated if needed
|
||||
|
||||
# Auto-generate a name from the URL
|
||||
import re
|
||||
|
|
@ -1249,6 +1270,8 @@ def _save_custom_provider(base_url, api_key="", model=""):
|
|||
entry["api_key"] = api_key
|
||||
if model:
|
||||
entry["model"] = model
|
||||
if model and context_length:
|
||||
entry["models"] = {model: {"context_length": context_length}}
|
||||
|
||||
providers.append(entry)
|
||||
cfg["custom_providers"] = providers
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue