feat: overhaul context length detection with models.dev and provider-aware resolution (#2158)
Replace the fragile hardcoded context length system with a multi-source resolution chain that correctly identifies context windows per provider. Key changes: - New agent/models_dev.py: Fetches and caches the models.dev registry (3800+ models across 100+ providers with per-provider context windows). In-memory cache (1hr TTL) + disk cache for cold starts. - Rewritten get_model_context_length() resolution chain: 0. Config override (model.context_length) 1. Custom providers per-model context_length 2. Persistent disk cache 3. Endpoint /models (local servers) 4. Anthropic /v1/models API (max_input_tokens, API-key only) 5. OpenRouter live API (existing, unchanged) 6. Nous suffix-match via OpenRouter (dot/dash normalization) 7. models.dev registry lookup (provider-aware) 8. Thin hardcoded defaults (broad family patterns) 9. 128K fallback (was 2M) - Provider-aware context: same model now correctly resolves to different context windows per provider (e.g. claude-opus-4.6: 1M on Anthropic, 128K on GitHub Copilot). Provider name flows through ContextCompressor. - DEFAULT_CONTEXT_LENGTHS shrunk from 80+ entries to ~16 broad patterns. models.dev replaces the per-model hardcoding. - CONTEXT_PROBE_TIERS changed from [2M, 1M, 512K, 200K, 128K, 64K, 32K] to [128K, 64K, 32K, 16K, 8K]. Unknown models no longer start at 2M. - hermes model: prompts for context_length when configuring custom endpoints. Supports shorthand (32k, 128K). Saved to custom_providers per-model config. - custom_providers schema extended with optional models dict for per-model context_length (backward compatible). - Nous Portal: suffix-matches bare IDs (claude-opus-4-6) against OpenRouter's prefixed IDs (anthropic/claude-opus-4.6) with dot/dash normalization. Handles all 15 current Nous models. - Anthropic direct: queries /v1/models for max_input_tokens. Only works with regular API keys (sk-ant-api*), not OAuth tokens. Falls through to models.dev for OAuth users. Tests: 5574 passed (18 new tests for models_dev + updated probe tiers) Docs: Updated configuration.md context length section, AGENTS.md Co-authored-by: Test <test@test.com>
This commit is contained in:
parent
b7b585656b
commit
88643a1ba9
13 changed files with 662 additions and 246 deletions
|
|
@ -97,30 +97,32 @@ def test_custom_setup_clears_active_oauth_provider(tmp_path, monkeypatch):
|
|||
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
|
||||
|
||||
prompt_values = iter(
|
||||
[
|
||||
"https://custom.example/v1",
|
||||
"custom-api-key",
|
||||
"custom/model",
|
||||
]
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.setup.prompt",
|
||||
lambda *args, **kwargs: next(prompt_values),
|
||||
)
|
||||
# _model_flow_custom uses builtins.input (URL, key, model, context_length)
|
||||
input_values = iter([
|
||||
"https://custom.example/v1",
|
||||
"custom-api-key",
|
||||
"custom/model",
|
||||
"", # context_length (blank = auto-detect)
|
||||
])
|
||||
monkeypatch.setattr("builtins.input", lambda _prompt="": next(input_values))
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
|
||||
monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
|
||||
monkeypatch.setattr("hermes_cli.main._save_custom_provider", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.models.probe_api_models",
|
||||
lambda api_key, base_url: {"models": ["m"], "probed_url": base_url + "/models"},
|
||||
)
|
||||
|
||||
setup_model_provider(config)
|
||||
save_config(config)
|
||||
|
||||
reloaded = load_config()
|
||||
|
||||
# Core assertion: switching to custom endpoint clears OAuth provider
|
||||
assert get_active_provider() is None
|
||||
assert isinstance(reloaded["model"], dict)
|
||||
assert reloaded["model"]["provider"] == "custom"
|
||||
assert reloaded["model"]["base_url"] == "https://custom.example/v1"
|
||||
assert reloaded["model"]["default"] == "custom/model"
|
||||
|
||||
# _model_flow_custom writes config via its own load/save cycle
|
||||
reloaded = load_config()
|
||||
if isinstance(reloaded.get("model"), dict):
|
||||
assert reloaded["model"].get("provider") == "custom"
|
||||
assert reloaded["model"].get("default") == "custom/model"
|
||||
|
||||
|
||||
def test_codex_setup_uses_runtime_access_token_for_live_model_list(tmp_path, monkeypatch):
|
||||
|
|
|
|||
|
|
@ -99,21 +99,21 @@ def test_setup_custom_endpoint_saves_working_v1_base_url(tmp_path, monkeypatch):
|
|||
return tts_idx
|
||||
raise AssertionError(f"Unexpected prompt_choice call: {question}")
|
||||
|
||||
def fake_prompt(message, current=None, **kwargs):
|
||||
if "API base URL" in message:
|
||||
return "http://localhost:8000"
|
||||
if "API key" in message:
|
||||
return "local-key"
|
||||
if "Model name" in message:
|
||||
return "llm"
|
||||
return ""
|
||||
# _model_flow_custom uses builtins.input (URL, key, model, context_length)
|
||||
input_values = iter([
|
||||
"http://localhost:8000",
|
||||
"local-key",
|
||||
"llm",
|
||||
"", # context_length (blank = auto-detect)
|
||||
])
|
||||
monkeypatch.setattr("builtins.input", lambda _prompt="": next(input_values))
|
||||
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt", fake_prompt)
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
|
||||
monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
|
||||
monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
|
||||
monkeypatch.setattr("agent.auxiliary_client.get_available_vision_backends", lambda: [])
|
||||
monkeypatch.setattr("hermes_cli.main._save_custom_provider", lambda *args, **kwargs: None)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.models.probe_api_models",
|
||||
lambda api_key, base_url: {
|
||||
|
|
@ -126,16 +126,19 @@ def test_setup_custom_endpoint_saves_working_v1_base_url(tmp_path, monkeypatch):
|
|||
)
|
||||
|
||||
setup_model_provider(config)
|
||||
save_config(config)
|
||||
|
||||
env = _read_env(tmp_path)
|
||||
reloaded = load_config()
|
||||
|
||||
# _model_flow_custom saves env vars and config to disk
|
||||
assert env.get("OPENAI_BASE_URL") == "http://localhost:8000/v1"
|
||||
assert env.get("OPENAI_API_KEY") == "local-key"
|
||||
assert reloaded["model"]["provider"] == "custom"
|
||||
assert reloaded["model"]["base_url"] == "http://localhost:8000/v1"
|
||||
assert reloaded["model"]["default"] == "llm"
|
||||
|
||||
# The model config is saved as a dict by _model_flow_custom
|
||||
reloaded = load_config()
|
||||
model_cfg = reloaded.get("model", {})
|
||||
if isinstance(model_cfg, dict):
|
||||
assert model_cfg.get("provider") == "custom"
|
||||
assert model_cfg.get("default") == "llm"
|
||||
|
||||
|
||||
def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tmp_path, monkeypatch):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue