feat: use endpoint metadata for custom model context and pricing (#1906)
* perf: cache base_url.lower() via property, consolidate triple load_config(), hoist set constant run_agent.py: - Add base_url property that auto-caches _base_url_lower on every assignment, eliminating 12+ redundant .lower() calls per API cycle across __init__, _build_api_kwargs, _supports_reasoning_extra_body, and the main conversation loop - Consolidate three separate load_config() disk reads in __init__ (memory, skills, compression) into a single call, reusing the result dict for all three config sections model_tools.py: - Hoist _READ_SEARCH_TOOLS set to module level (was rebuilt inside handle_function_call on every tool invocation) * Use endpoint metadata for custom model context and pricing --------- Co-authored-by: kshitij <82637225+kshitijk4poor@users.noreply.github.com>
This commit is contained in:
parent
11f029c311
commit
a2440f72f6
7 changed files with 375 additions and 49 deletions
|
|
@ -99,3 +99,27 @@ def test_estimate_usage_cost_refuses_cache_pricing_without_official_cache_rate(m
|
|||
)
|
||||
|
||||
assert result.status == "unknown"
|
||||
|
||||
|
||||
def test_custom_endpoint_models_api_pricing_is_supported(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"agent.usage_pricing.fetch_endpoint_model_metadata",
|
||||
lambda base_url, api_key=None: {
|
||||
"zai-org/GLM-5-TEE": {
|
||||
"pricing": {
|
||||
"prompt": "0.0000005",
|
||||
"completion": "0.000002",
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
entry = get_pricing_entry(
|
||||
"zai-org/GLM-5-TEE",
|
||||
provider="custom",
|
||||
base_url="https://llm.chutes.ai/v1",
|
||||
api_key="test-key",
|
||||
)
|
||||
|
||||
assert float(entry.input_cost_per_million) == 0.5
|
||||
assert float(entry.output_cost_per_million) == 2.0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue