feat: use endpoint metadata for custom model context and pricing (#1906)
* perf: cache base_url.lower() via property, consolidate triple load_config(), hoist set constant run_agent.py: - Add base_url property that auto-caches _base_url_lower on every assignment, eliminating 12+ redundant .lower() calls per API cycle across __init__, _build_api_kwargs, _supports_reasoning_extra_body, and the main conversation loop - Consolidate three separate load_config() disk reads in __init__ (memory, skills, compression) into a single call, reusing the result dict for all three config sections model_tools.py: - Hoist _READ_SEARCH_TOOLS set to module level (was rebuilt inside handle_function_call on every tool invocation) * Use endpoint metadata for custom model context and pricing --------- Co-authored-by: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
This commit is contained in:
parent
11f029c311
commit
a2440f72f6
7 changed files with 375 additions and 49 deletions
|
|
@ -188,6 +188,36 @@ class TestGetModelContextLength:
|
|||
result = get_model_context_length("custom/model")
|
||||
assert result == CONTEXT_PROBE_TIERS[0]
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
def test_custom_endpoint_metadata_beats_fuzzy_default(self, mock_endpoint_fetch, mock_fetch):
|
||||
mock_fetch.return_value = {}
|
||||
mock_endpoint_fetch.return_value = {
|
||||
"zai-org/GLM-5-TEE": {"context_length": 65536}
|
||||
}
|
||||
|
||||
result = get_model_context_length(
|
||||
"zai-org/GLM-5-TEE",
|
||||
base_url="https://llm.chutes.ai/v1",
|
||||
api_key="test-key",
|
||||
)
|
||||
|
||||
assert result == 65536
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
def test_custom_endpoint_without_metadata_skips_name_based_default(self, mock_endpoint_fetch, mock_fetch):
|
||||
mock_fetch.return_value = {}
|
||||
mock_endpoint_fetch.return_value = {}
|
||||
|
||||
result = get_model_context_length(
|
||||
"zai-org/GLM-5-TEE",
|
||||
base_url="https://llm.chutes.ai/v1",
|
||||
api_key="test-key",
|
||||
)
|
||||
|
||||
assert result == CONTEXT_PROBE_TIERS[0]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# fetch_model_metadata — caching, TTL, slugs, failures
|
||||
|
|
@ -258,6 +288,25 @@ class TestFetchModelMetadata:
|
|||
assert "anthropic/claude-3.5-sonnet" in result
|
||||
assert result["anthropic/claude-3.5-sonnet"]["context_length"] == 200000
|
||||
|
||||
@patch("agent.model_metadata.requests.get")
|
||||
def test_provider_prefixed_models_get_bare_aliases(self, mock_get):
|
||||
self._reset_cache()
|
||||
mock_response = MagicMock()
|
||||
mock_response.json.return_value = {
|
||||
"data": [{
|
||||
"id": "provider/test-model",
|
||||
"context_length": 123456,
|
||||
"name": "Provider: Test Model",
|
||||
}]
|
||||
}
|
||||
mock_response.raise_for_status = MagicMock()
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
result = fetch_model_metadata(force_refresh=True)
|
||||
|
||||
assert result["provider/test-model"]["context_length"] == 123456
|
||||
assert result["test-model"]["context_length"] == 123456
|
||||
|
||||
@patch("agent.model_metadata.requests.get")
|
||||
def test_ttl_expiry_triggers_refetch(self, mock_get):
|
||||
"""Cache expires after _MODEL_CACHE_TTL seconds."""
|
||||
|
|
|
|||
|
|
@ -99,3 +99,27 @@ def test_estimate_usage_cost_refuses_cache_pricing_without_official_cache_rate(m
|
|||
)
|
||||
|
||||
assert result.status == "unknown"
|
||||
|
||||
|
||||
def test_custom_endpoint_models_api_pricing_is_supported(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"agent.usage_pricing.fetch_endpoint_model_metadata",
|
||||
lambda base_url, api_key=None: {
|
||||
"zai-org/GLM-5-TEE": {
|
||||
"pricing": {
|
||||
"prompt": "0.0000005",
|
||||
"completion": "0.000002",
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
entry = get_pricing_entry(
|
||||
"zai-org/GLM-5-TEE",
|
||||
provider="custom",
|
||||
base_url="https://llm.chutes.ai/v1",
|
||||
api_key="test-key",
|
||||
)
|
||||
|
||||
assert float(entry.input_cost_per_million) == 0.5
|
||||
assert float(entry.output_cost_per_million) == 2.0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue