fix: correct Copilot API mode selection to match opencode

The previous copilot_model_api_mode() checked the catalog's supported_endpoints first and picked /chat/completions when a model supported both endpoints. This is wrong — GPT-5+ models should use the Responses API even when the catalog lists both. Replicate opencode's shouldUseCopilotResponsesApi() logic: - GPT-5+ models (gpt-5.4, gpt-5.3-codex, etc.) → Responses API - gpt-5-mini → Chat Completions (explicit exception) - Everything else (gpt-4o, claude, gemini, etc.) → Chat Completions - Model ID pattern is the primary signal, catalog is secondary The catalog fallback now only matters for non-GPT-5 models that might exclusively support /v1/messages (e.g. Claude via Copilot). Models are auto-detected from the live catalog at api.githubcopilot.com/models — no hardcoded list required for supported models, only a static fallback for when the API is unreachable.
2026-03-18 03:54:50 -07:00 · 2026-03-18 03:54:50 -07:00 · 36921a3e98
commit 36921a3e98
parent 21c45ba0ac
3 changed files with 98 additions and 17 deletions
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -802,38 +802,59 @@ def _github_reasoning_efforts_for_model_id(model_id: str) -> list[str]:
    return []


+def _should_use_copilot_responses_api(model_id: str) -> bool:
+    """Decide whether a Copilot model should use the Responses API.
+
+    Replicates opencode's ``shouldUseCopilotResponsesApi`` logic:
+    GPT-5+ models use Responses API, except ``gpt-5-mini`` which uses
+    Chat Completions.  All non-GPT models (Claude, Gemini, etc.) use
+    Chat Completions.
+    """
+    import re
+
+    match = re.match(r"^gpt-(\d+)", model_id)
+    if not match:
+        return False
+    major = int(match.group(1))
+    return major >= 5 and not model_id.startswith("gpt-5-mini")
+
+
 def copilot_model_api_mode(
    model_id: Optional[str],
    *,
    catalog: Optional[list[dict[str, Any]]] = None,
    api_key: Optional[str] = None,
 ) -> str:
+    """Determine the API mode for a Copilot model.
+
+    Uses the model ID pattern (matching opencode's approach) as the
+    primary signal.  Falls back to the catalog's ``supported_endpoints``
+    only for models not covered by the pattern check.
+    """
    normalized = normalize_copilot_model_id(model_id, catalog=catalog, api_key=api_key)
    if not normalized:
        return "chat_completions"

+    # Primary: model ID pattern (matches opencode's shouldUseCopilotResponsesApi)
+    if _should_use_copilot_responses_api(normalized):
+        return "codex_responses"
+
+    # Secondary: check catalog for non-GPT-5 models (Claude via /v1/messages, etc.)
    if catalog is None and api_key:
        catalog = fetch_github_model_catalog(api_key=api_key)

-    catalog_entry = None
    if catalog:
        catalog_entry = next((item for item in catalog if item.get("id") == normalized), None)
+        if isinstance(catalog_entry, dict):
+            supported_endpoints = {
+                str(endpoint).strip()
+                for endpoint in (catalog_entry.get("supported_endpoints") or [])
+                if str(endpoint).strip()
+            }
+            # For non-GPT-5 models, check if they only support messages API
+            if "/v1/messages" in supported_endpoints and "/chat/completions" not in supported_endpoints:
+                return "anthropic_messages"

-    if isinstance(catalog_entry, dict):
-        supported_endpoints = {
-            str(endpoint).strip()
-            for endpoint in (catalog_entry.get("supported_endpoints") or [])
-            if str(endpoint).strip()
-        }
-        if "/chat/completions" in supported_endpoints:
-            return "chat_completions"
-        if "/responses" in supported_endpoints:
-            return "codex_responses"
-        if "/v1/messages" in supported_endpoints:
-            return "anthropic_messages"
-
-    if normalized.startswith(("gpt-5.4", "gpt-5.3-codex", "gpt-5.2-codex", "gpt-5.1-codex")):
-        return "codex_responses"
    return "chat_completions"