feat: overhaul context length detection with models.dev and provider-aware resolution (#2158)
Replace the fragile hardcoded context length system with a multi-source resolution chain that correctly identifies context windows per provider. Key changes: - New agent/models_dev.py: Fetches and caches the models.dev registry (3800+ models across 100+ providers with per-provider context windows). In-memory cache (1hr TTL) + disk cache for cold starts. - Rewritten get_model_context_length() resolution chain: 0. Config override (model.context_length) 1. Custom providers per-model context_length 2. Persistent disk cache 3. Endpoint /models (local servers) 4. Anthropic /v1/models API (max_input_tokens, API-key only) 5. OpenRouter live API (existing, unchanged) 6. Nous suffix-match via OpenRouter (dot/dash normalization) 7. models.dev registry lookup (provider-aware) 8. Thin hardcoded defaults (broad family patterns) 9. 128K fallback (was 2M) - Provider-aware context: same model now correctly resolves to different context windows per provider (e.g. claude-opus-4.6: 1M on Anthropic, 128K on GitHub Copilot). Provider name flows through ContextCompressor. - DEFAULT_CONTEXT_LENGTHS shrunk from 80+ entries to ~16 broad patterns. models.dev replaces the per-model hardcoding. - CONTEXT_PROBE_TIERS changed from [2M, 1M, 512K, 200K, 128K, 64K, 32K] to [128K, 64K, 32K, 16K, 8K]. Unknown models no longer start at 2M. - hermes model: prompts for context_length when configuring custom endpoints. Supports shorthand (32k, 128K). Saved to custom_providers per-model config. - custom_providers schema extended with optional models dict for per-model context_length (backward compatible). - Nous Portal: suffix-matches bare IDs (claude-opus-4-6) against OpenRouter's prefixed IDs (anthropic/claude-opus-4.6) with dot/dash normalization. Handles all 15 current Nous models. - Anthropic direct: queries /v1/models for max_input_tokens. Only works with regular API keys (sk-ant-api*), not OAuth tokens. Falls through to models.dev for OAuth users. Tests: 5574 passed (18 new tests for models_dev + updated probe tiers) Docs: Updated configuration.md context length section, AGENTS.md Co-authored-by: Test <test@test.com>
This commit is contained in:
parent
b7b585656b
commit
88643a1ba9
13 changed files with 662 additions and 246 deletions
|
|
@ -1137,10 +1137,21 @@ def _model_flow_custom(config):
|
|||
base_url = input(f"API base URL [{current_url or 'e.g. https://api.example.com/v1'}]: ").strip()
|
||||
api_key = input(f"API key [{current_key[:8] + '...' if current_key else 'optional'}]: ").strip()
|
||||
model_name = input("Model name (e.g. gpt-4, llama-3-70b): ").strip()
|
||||
context_length_str = input("Context length in tokens [leave blank for auto-detect]: ").strip()
|
||||
except (KeyboardInterrupt, EOFError):
|
||||
print("\nCancelled.")
|
||||
return
|
||||
|
||||
context_length = None
|
||||
if context_length_str:
|
||||
try:
|
||||
context_length = int(context_length_str.replace(",", "").replace("k", "000").replace("K", "000"))
|
||||
if context_length <= 0:
|
||||
context_length = None
|
||||
except ValueError:
|
||||
print(f"Invalid context length: {context_length_str} — will auto-detect.")
|
||||
context_length = None
|
||||
|
||||
if not base_url and not current_url:
|
||||
print("No URL provided. Cancelled.")
|
||||
return
|
||||
|
|
@ -1203,14 +1214,14 @@ def _model_flow_custom(config):
|
|||
print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.")
|
||||
|
||||
# Auto-save to custom_providers so it appears in the menu next time
|
||||
_save_custom_provider(effective_url, effective_key, model_name or "")
|
||||
_save_custom_provider(effective_url, effective_key, model_name or "", context_length=context_length)
|
||||
|
||||
|
||||
def _save_custom_provider(base_url, api_key="", model=""):
|
||||
def _save_custom_provider(base_url, api_key="", model="", context_length=None):
|
||||
"""Save a custom endpoint to custom_providers in config.yaml.
|
||||
|
||||
Deduplicates by base_url — if the URL already exists, updates the
|
||||
model name but doesn't add a duplicate entry.
|
||||
model name and context_length but doesn't add a duplicate entry.
|
||||
Auto-generates a display name from the URL hostname.
|
||||
"""
|
||||
from hermes_cli.config import load_config, save_config
|
||||
|
|
@ -1220,14 +1231,24 @@ def _save_custom_provider(base_url, api_key="", model=""):
|
|||
if not isinstance(providers, list):
|
||||
providers = []
|
||||
|
||||
# Check if this URL is already saved — update model if so
|
||||
# Check if this URL is already saved — update model/context_length if so
|
||||
for entry in providers:
|
||||
if isinstance(entry, dict) and entry.get("base_url", "").rstrip("/") == base_url.rstrip("/"):
|
||||
changed = False
|
||||
if model and entry.get("model") != model:
|
||||
entry["model"] = model
|
||||
changed = True
|
||||
if model and context_length:
|
||||
models_cfg = entry.get("models", {})
|
||||
if not isinstance(models_cfg, dict):
|
||||
models_cfg = {}
|
||||
models_cfg[model] = {"context_length": context_length}
|
||||
entry["models"] = models_cfg
|
||||
changed = True
|
||||
if changed:
|
||||
cfg["custom_providers"] = providers
|
||||
save_config(cfg)
|
||||
return # already saved, updated model if needed
|
||||
return # already saved, updated if needed
|
||||
|
||||
# Auto-generate a name from the URL
|
||||
import re
|
||||
|
|
@ -1249,6 +1270,8 @@ def _save_custom_provider(base_url, api_key="", model=""):
|
|||
entry["api_key"] = api_key
|
||||
if model:
|
||||
entry["model"] = model
|
||||
if model and context_length:
|
||||
entry["models"] = {model: {"context_length": context_length}}
|
||||
|
||||
providers.append(entry)
|
||||
cfg["custom_providers"] = providers
|
||||
|
|
|
|||
|
|
@ -1045,93 +1045,17 @@ def setup_model_provider(config: dict):
|
|||
print()
|
||||
print_header("Custom OpenAI-Compatible Endpoint")
|
||||
print_info("Works with any API that follows OpenAI's chat completions spec")
|
||||
print()
|
||||
|
||||
current_url = get_env_value("OPENAI_BASE_URL") or ""
|
||||
current_key = get_env_value("OPENAI_API_KEY")
|
||||
_raw_model = config.get("model", "")
|
||||
current_model = (
|
||||
_raw_model.get("default", "")
|
||||
if isinstance(_raw_model, dict)
|
||||
else (_raw_model or "")
|
||||
)
|
||||
|
||||
if current_url:
|
||||
print_info(f" Current URL: {current_url}")
|
||||
if current_key:
|
||||
print_info(f" Current key: {current_key[:8]}... (configured)")
|
||||
|
||||
base_url = prompt(
|
||||
" API base URL (e.g., https://api.example.com/v1)", current_url
|
||||
).strip()
|
||||
api_key = prompt(" API key", password=True)
|
||||
model_name = prompt(" Model name (e.g., gpt-4, claude-3-opus)", current_model)
|
||||
|
||||
if base_url:
|
||||
from hermes_cli.models import probe_api_models
|
||||
|
||||
probe = probe_api_models(api_key, base_url)
|
||||
if probe.get("used_fallback") and probe.get("resolved_base_url"):
|
||||
print_warning(
|
||||
f"Endpoint verification worked at {probe['resolved_base_url']}/models, "
|
||||
f"not the exact URL you entered. Saving the working base URL instead."
|
||||
)
|
||||
base_url = probe["resolved_base_url"]
|
||||
elif probe.get("models") is not None:
|
||||
print_success(
|
||||
f"Verified endpoint via {probe.get('probed_url')} "
|
||||
f"({len(probe.get('models') or [])} model(s) visible)"
|
||||
)
|
||||
else:
|
||||
print_warning(
|
||||
f"Could not verify this endpoint via {probe.get('probed_url')}. "
|
||||
f"Hermes will still save it."
|
||||
)
|
||||
if probe.get("suggested_base_url"):
|
||||
print_info(
|
||||
f" If this server expects /v1, try base URL: {probe['suggested_base_url']}"
|
||||
)
|
||||
|
||||
save_env_value("OPENAI_BASE_URL", base_url)
|
||||
if api_key:
|
||||
save_env_value("OPENAI_API_KEY", api_key)
|
||||
if model_name:
|
||||
_set_default_model(config, model_name)
|
||||
|
||||
try:
|
||||
from hermes_cli.auth import deactivate_provider
|
||||
|
||||
deactivate_provider()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Save provider and base_url to config.yaml so the gateway and CLI
|
||||
# both resolve the correct provider without relying on env-var heuristics.
|
||||
if base_url:
|
||||
import yaml
|
||||
|
||||
config_path = (
|
||||
Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||
/ "config.yaml"
|
||||
)
|
||||
try:
|
||||
disk_cfg = {}
|
||||
if config_path.exists():
|
||||
disk_cfg = yaml.safe_load(config_path.read_text()) or {}
|
||||
model_section = disk_cfg.get("model", {})
|
||||
if isinstance(model_section, str):
|
||||
model_section = {"default": model_section}
|
||||
model_section["provider"] = "custom"
|
||||
model_section["base_url"] = base_url.rstrip("/")
|
||||
if model_name:
|
||||
model_section["default"] = model_name
|
||||
disk_cfg["model"] = model_section
|
||||
config_path.write_text(yaml.safe_dump(disk_cfg, sort_keys=False))
|
||||
except Exception as e:
|
||||
logger.debug("Could not save provider to config.yaml: %s", e)
|
||||
|
||||
_set_model_provider(config, "custom", base_url)
|
||||
|
||||
print_success("Custom endpoint configured")
|
||||
# Reuse the shared custom endpoint flow from `hermes model`.
|
||||
# This handles: URL/key/model/context-length prompts, endpoint probing,
|
||||
# env saving, config.yaml updates, and custom_providers persistence.
|
||||
from hermes_cli.main import _model_flow_custom
|
||||
_model_flow_custom(config)
|
||||
# _model_flow_custom handles model selection, config, env vars,
|
||||
# and custom_providers. Keep selected_provider = "custom" so
|
||||
# the model selection step below is skipped (line 1631 check)
|
||||
# but vision and TTS setup still run.
|
||||
|
||||
elif provider_idx == 4: # Z.AI / GLM
|
||||
selected_provider = "zai"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue