feat: centralized provider router + fix Codex vision bypass + vision error handling

Three interconnected fixes for auxiliary client infrastructure:

1. CENTRALIZED PROVIDER ROUTER (auxiliary_client.py)
   Add resolve_provider_client(provider, model, async_mode) — a single
   entry point for creating properly configured clients. Given a provider
   name and optional model, it handles auth lookup (env vars, OAuth
   tokens, auth.json), base URL resolution, provider-specific headers,
   and API format differences (Chat Completions vs Responses API for
   Codex). All auxiliary consumers should route through this instead of
   ad-hoc env var lookups.

   Refactored get_text_auxiliary_client, get_async_text_auxiliary_client,
   and get_vision_auxiliary_client to use the router internally.

2. FIX CODEX VISION BYPASS (vision_tools.py)
   vision_tools.py was constructing a raw AsyncOpenAI client from the
   sync vision client's api_key/base_url, completely bypassing the Codex
   Responses API adapter. When the vision provider resolved to Codex,
   the raw client would hit chatgpt.com/backend-api/codex with
   chat.completions.create() which only supports the Responses API.

   Fix: Added get_async_vision_auxiliary_client() which properly wraps
   Codex into AsyncCodexAuxiliaryClient. vision_tools.py now uses this
   instead of manual client construction.

3. FIX COMPRESSION FALLBACK + VISION ERROR HANDLING
   - context_compressor.py: Removed _get_fallback_client() which blindly
     looked for OPENAI_API_KEY + OPENAI_BASE_URL (fails for Codex OAuth,
     API-key providers, users without OPENAI_BASE_URL set). Replaced
     with fallback loop through resolve_provider_client() for each
     known provider, with same-provider dedup.

   - vision_tools.py: Added error detection for vision capability
     failures. Returns clear message to the model when the configured
     model doesn't support vision, instead of a generic error.

Addresses #886
This commit is contained in:
teknium1 2026-03-11 19:46:47 -07:00
parent a8409a161f
commit 8805e705a7
3 changed files with 256 additions and 78 deletions

View file

@ -499,6 +499,188 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
return None, None
# ── Centralized Provider Router ─────────────────────────────────────────────
#
# resolve_provider_client() is the single entry point for creating a properly
# configured client given a (provider, model) pair. It handles auth lookup,
# base URL resolution, provider-specific headers, and API format differences
# (Chat Completions vs Responses API for Codex).
#
# All auxiliary consumer code should go through this or the public helpers
# below — never look up auth env vars ad-hoc.
def _to_async_client(sync_client, model: str):
"""Convert a sync client to its async counterpart, preserving Codex routing."""
from openai import AsyncOpenAI
if isinstance(sync_client, CodexAuxiliaryClient):
return AsyncCodexAuxiliaryClient(sync_client), model
async_kwargs = {
"api_key": sync_client.api_key,
"base_url": str(sync_client.base_url),
}
base_lower = str(sync_client.base_url).lower()
if "openrouter" in base_lower:
async_kwargs["default_headers"] = dict(_OR_HEADERS)
elif "api.kimi.com" in base_lower:
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
return AsyncOpenAI(**async_kwargs), model
def resolve_provider_client(
provider: str,
model: str = None,
async_mode: bool = False,
) -> Tuple[Optional[Any], Optional[str]]:
"""Central router: given a provider name and optional model, return a
configured client with the correct auth, base URL, and API format.
The returned client always exposes ``.chat.completions.create()`` for
Codex/Responses API providers, an adapter handles the translation
transparently.
Args:
provider: Provider identifier. One of:
"openrouter", "nous", "openai-codex" (or "codex"),
"zai", "kimi-coding", "minimax", "minimax-cn", "nous-api",
"custom" (OPENAI_BASE_URL + OPENAI_API_KEY),
"auto" (full auto-detection chain).
model: Model slug override. If None, uses the provider's default
auxiliary model.
async_mode: If True, return an async-compatible client.
Returns:
(client, resolved_model) or (None, None) if auth is unavailable.
"""
# Normalise aliases
provider = (provider or "auto").strip().lower()
if provider == "codex":
provider = "openai-codex"
if provider == "main":
provider = "custom"
# ── Auto: try all providers in priority order ────────────────────
if provider == "auto":
client, resolved = _resolve_auto()
if client is None:
return None, None
final_model = model or resolved
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── OpenRouter ───────────────────────────────────────────────────
if provider == "openrouter":
client, default = _try_openrouter()
if client is None:
logger.warning("resolve_provider_client: openrouter requested "
"but OPENROUTER_API_KEY not set")
return None, None
final_model = model or default
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── Nous Portal (OAuth) ──────────────────────────────────────────
if provider == "nous":
client, default = _try_nous()
if client is None:
logger.warning("resolve_provider_client: nous requested "
"but Nous Portal not configured (run: hermes login)")
return None, None
final_model = model or default
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
if provider == "openai-codex":
client, default = _try_codex()
if client is None:
logger.warning("resolve_provider_client: openai-codex requested "
"but no Codex OAuth token found (run: hermes model)")
return None, None
final_model = model or default
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
if provider == "custom":
# Try custom first, then codex, then API-key providers
for try_fn in (_try_custom_endpoint, _try_codex,
_resolve_api_key_provider):
client, default = try_fn()
if client is not None:
final_model = model or default
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
logger.warning("resolve_provider_client: custom/main requested "
"but no endpoint credentials found")
return None, None
# ── API-key providers from PROVIDER_REGISTRY ─────────────────────
try:
from hermes_cli.auth import PROVIDER_REGISTRY, _resolve_kimi_base_url
except ImportError:
logger.debug("hermes_cli.auth not available for provider %s", provider)
return None, None
pconfig = PROVIDER_REGISTRY.get(provider)
if pconfig is None:
logger.warning("resolve_provider_client: unknown provider %r", provider)
return None, None
if pconfig.auth_type == "api_key":
# Find the first configured API key
api_key = ""
for env_var in pconfig.api_key_env_vars:
api_key = os.getenv(env_var, "").strip()
if api_key:
break
if not api_key:
logger.warning("resolve_provider_client: provider %s has no API "
"key configured (tried: %s)",
provider, ", ".join(pconfig.api_key_env_vars))
return None, None
# Resolve base URL (env override → provider-specific logic → default)
base_url_override = os.getenv(pconfig.base_url_env_var, "").strip() if pconfig.base_url_env_var else ""
if provider == "kimi-coding":
base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, base_url_override)
elif base_url_override:
base_url = base_url_override
else:
base_url = pconfig.inference_base_url
default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
final_model = model or default_model
# Provider-specific headers
headers = {}
if "api.kimi.com" in base_url.lower():
headers["User-Agent"] = "KimiCLI/1.0"
client = OpenAI(api_key=api_key, base_url=base_url,
**({"default_headers": headers} if headers else {}))
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
# OAuth providers — route through their specific try functions
if provider == "nous":
return resolve_provider_client("nous", model, async_mode)
if provider == "openai-codex":
return resolve_provider_client("openai-codex", model, async_mode)
# nous-api is api_key type so it's handled above
logger.warning("resolve_provider_client: OAuth provider %s not "
"directly supported, try 'auto'", provider)
return None, None
logger.warning("resolve_provider_client: unhandled auth_type %s for %s",
pconfig.auth_type, provider)
return None, None
# ── Public API ──────────────────────────────────────────────────────────────
def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]:
@ -513,8 +695,8 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
"""
forced = _get_auxiliary_provider(task)
if forced != "auto":
return _resolve_forced_provider(forced)
return _resolve_auto()
return resolve_provider_client(forced)
return resolve_provider_client("auto")
def get_async_text_auxiliary_client(task: str = ""):
@ -524,24 +706,10 @@ def get_async_text_auxiliary_client(task: str = ""):
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
Returns (None, None) when no provider is available.
"""
from openai import AsyncOpenAI
sync_client, model = get_text_auxiliary_client(task)
if sync_client is None:
return None, None
if isinstance(sync_client, CodexAuxiliaryClient):
return AsyncCodexAuxiliaryClient(sync_client), model
async_kwargs = {
"api_key": sync_client.api_key,
"base_url": str(sync_client.base_url),
}
if "openrouter" in str(sync_client.base_url).lower():
async_kwargs["default_headers"] = dict(_OR_HEADERS)
elif "api.kimi.com" in str(sync_client.base_url).lower():
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
return AsyncOpenAI(**async_kwargs), model
forced = _get_auxiliary_provider(task)
if forced != "auto":
return resolve_provider_client(forced, async_mode=True)
return resolve_provider_client("auto", async_mode=True)
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
@ -559,7 +727,7 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
"""
forced = _get_auxiliary_provider("vision")
if forced != "auto":
return _resolve_forced_provider(forced)
return resolve_provider_client(forced)
# Auto: try providers known to support multimodal first, then fall
# back to the user's custom endpoint. Many local models (Qwen-VL,
# LLaVA, Pixtral, etc.) support vision — skipping them entirely
@ -573,6 +741,21 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
return None, None
def get_async_vision_auxiliary_client():
"""Return (async_client, model_slug) for async vision consumers.
Properly handles Codex routing unlike manually constructing
AsyncOpenAI from a sync client, this preserves the Responses API
adapter for Codex providers.
Returns (None, None) when no provider is available.
"""
sync_client, model = get_vision_auxiliary_client()
if sync_client is None:
return None, None
return _to_async_client(sync_client, model)
def get_auxiliary_extra_body() -> dict:
"""Return extra_body kwargs for auxiliary API calls.