fix: preserve Ollama model:tag colons in context length detection (#2149)

The colon-split logic in get_model_context_length() and _query_local_context_length() assumed any colon meant provider:model format (e.g. "local:my-model"). But Ollama uses model:tag format (e.g. "qwen3.5:27b"), so the split turned "qwen3.5:27b" into just "27b" — which matches nothing, causing a fallback to the 2M token probe tier. Now only recognised provider prefixes (local, openrouter, anthropic, etc.) are stripped. Ollama model:tag names pass through intact. Co-authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com>
2026-03-20 03:19:31 -07:00 · 2026-03-20 03:19:31 -07:00 · 471ea81a7d
commit 471ea81a7d
parent 3a9a1bbb84
2 changed files with 77 additions and 7 deletions
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@ -22,6 +22,7 @@ from unittest.mock import patch, MagicMock
 from agent.model_metadata import (
    CONTEXT_PROBE_TIERS,
    DEFAULT_CONTEXT_LENGTHS,
+    _strip_provider_prefix,
    estimate_tokens_rough,
    estimate_messages_tokens_rough,
    get_model_context_length,
@ -292,6 +293,49 @@ class TestGetModelContextLength:
        assert result == 200000


+# =========================================================================
+# _strip_provider_prefix — Ollama model:tag vs provider:model
+# =========================================================================
+
+class TestStripProviderPrefix:
+    def test_known_provider_prefix_is_stripped(self):
+        assert _strip_provider_prefix("local:my-model") == "my-model"
+        assert _strip_provider_prefix("openrouter:anthropic/claude-sonnet-4") == "anthropic/claude-sonnet-4"
+        assert _strip_provider_prefix("anthropic:claude-sonnet-4") == "claude-sonnet-4"
+
+    def test_ollama_model_tag_preserved(self):
+        """Ollama model:tag format must NOT be stripped."""
+        assert _strip_provider_prefix("qwen3.5:27b") == "qwen3.5:27b"
+        assert _strip_provider_prefix("llama3.3:70b") == "llama3.3:70b"
+        assert _strip_provider_prefix("gemma2:9b") == "gemma2:9b"
+        assert _strip_provider_prefix("codellama:13b-instruct-q4_0") == "codellama:13b-instruct-q4_0"
+
+    def test_http_urls_preserved(self):
+        assert _strip_provider_prefix("http://example.com") == "http://example.com"
+        assert _strip_provider_prefix("https://example.com") == "https://example.com"
+
+    def test_no_colon_returns_unchanged(self):
+        assert _strip_provider_prefix("gpt-4o") == "gpt-4o"
+        assert _strip_provider_prefix("anthropic/claude-sonnet-4") == "anthropic/claude-sonnet-4"
+
+    @patch("agent.model_metadata.fetch_model_metadata")
+    def test_ollama_model_tag_not_mangled_in_context_lookup(self, mock_fetch):
+        """Ensure 'qwen3.5:27b' is NOT reduced to '27b' during context length lookup.
+
+        We mock a custom endpoint that knows 'qwen3.5:27b' — the full name
+        must reach the endpoint metadata lookup intact.
+        """
+        mock_fetch.return_value = {}
+        with patch("agent.model_metadata.fetch_endpoint_model_metadata") as mock_ep, \
+             patch("agent.model_metadata._is_custom_endpoint", return_value=True):
+            mock_ep.return_value = {"qwen3.5:27b": {"context_length": 32768}}
+            result = get_model_context_length(
+                "qwen3.5:27b",
+                base_url="http://localhost:11434/v1",
+            )
+        assert result == 32768
+
+
 # =========================================================================
 # fetch_model_metadata — caching, TTL, slugs, failures
 # =========================================================================