From 51b95236f97647fe3680d59bd69f83c91e302cf6 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Sat, 21 Feb 2026 22:34:18 -0800 Subject: [PATCH] refactor: move model metadata functions to agent/model_metadata.py - Relocated functions related to model metadata, including fetch_model_metadata, get_model_context_length, estimate_tokens_rough, and estimate_messages_tokens_rough, to agent/model_metadata.py for better organization and maintainability. - Updated imports in run_agent.py to reflect the new location of these functions. --- run_agent.py | 133 ++------------------------------------------------- 1 file changed, 3 insertions(+), 130 deletions(-) diff --git a/run_agent.py b/run_agent.py index 5b99f3b6..882d10b2 100644 --- a/run_agent.py +++ b/run_agent.py @@ -77,136 +77,9 @@ from agent.trajectory import ( save_trajectory as _save_trajectory_to_file, ) -# ============================================================================= -# Model Context Management (extracted to agent/model_metadata.py) -# The functions below are re-imported above; these stubs maintain the -# module-level names for any internal references that use the unqualified name. -# ============================================================================= - -DEFAULT_CONTEXT_LENGTHS = { - "anthropic/claude-opus-4": 200000, - "anthropic/claude-opus-4.5": 200000, - "anthropic/claude-opus-4.6": 200000, - "anthropic/claude-sonnet-4": 200000, - "anthropic/claude-sonnet-4-20250514": 200000, - "anthropic/claude-haiku-4.5": 200000, - "openai/gpt-4o": 128000, - "openai/gpt-4-turbo": 128000, - "openai/gpt-4o-mini": 128000, - "google/gemini-2.0-flash": 1048576, - "google/gemini-2.5-pro": 1048576, - "meta-llama/llama-3.3-70b-instruct": 131072, - "deepseek/deepseek-chat-v3": 65536, - "qwen/qwen-2.5-72b-instruct": 32768, -} - - -def fetch_model_metadata(force_refresh: bool = False) -> Dict[str, Dict[str, Any]]: - """ - Fetch model metadata from OpenRouter's /api/v1/models endpoint. - Results are cached for 1 hour to minimize API calls. - - Returns: - Dict mapping model_id to metadata (context_length, max_completion_tokens, etc.) - """ - global _model_metadata_cache, _model_metadata_cache_time - - # Return cached data if fresh - if not force_refresh and _model_metadata_cache and (time.time() - _model_metadata_cache_time) < _MODEL_CACHE_TTL: - return _model_metadata_cache - - try: - response = requests.get( - OPENROUTER_MODELS_URL, - timeout=10 - ) - response.raise_for_status() - data = response.json() - - # Build cache mapping model_id to relevant metadata - cache = {} - for model in data.get("data", []): - model_id = model.get("id", "") - cache[model_id] = { - "context_length": model.get("context_length", 128000), - "max_completion_tokens": model.get("top_provider", {}).get("max_completion_tokens", 4096), - "name": model.get("name", model_id), - "pricing": model.get("pricing", {}), - } - # Also cache by canonical slug if different - canonical = model.get("canonical_slug", "") - if canonical and canonical != model_id: - cache[canonical] = cache[model_id] - - _model_metadata_cache = cache - _model_metadata_cache_time = time.time() - - logger.debug("Fetched metadata for %s models from OpenRouter", len(cache)) - - return cache - - except Exception as e: - logging.warning(f"Failed to fetch model metadata from OpenRouter: {e}") - # Return cached data even if stale, or empty dict - return _model_metadata_cache or {} - - -def get_model_context_length(model: str) -> int: - """ - Get the context length for a specific model. - - Args: - model: Model identifier (e.g., "anthropic/claude-sonnet-4") - - Returns: - Context length in tokens (defaults to 128000 if unknown) - """ - # Try to get from OpenRouter API - metadata = fetch_model_metadata() - if model in metadata: - return metadata[model].get("context_length", 128000) - - # Check default fallbacks (handles partial matches) - for default_model, length in DEFAULT_CONTEXT_LENGTHS.items(): - if default_model in model or model in default_model: - return length - - # Conservative default - return 128000 - - -def estimate_tokens_rough(text: str) -> int: - """ - Rough token estimate for pre-flight checks (before API call). - Uses ~4 chars per token heuristic. - - For accurate counts, use the `usage.prompt_tokens` from API responses. - - Args: - text: Text to estimate tokens for - - Returns: - Rough estimated token count - """ - if not text: - return 0 - return len(text) // 4 - - -def estimate_messages_tokens_rough(messages: List[Dict[str, Any]]) -> int: - """ - Rough token estimate for messages (pre-flight check only). - - For accurate counts, use the `usage.prompt_tokens` from API responses. - - Args: - messages: List of message dicts - - Returns: - Rough estimated token count - """ - total_chars = sum(len(str(msg)) for msg in messages) - return total_chars // 4 +# Model metadata functions (fetch_model_metadata, get_model_context_length, +# estimate_tokens_rough, estimate_messages_tokens_rough) are now in +# agent/model_metadata.py -- imported above. class ContextCompressor: