refactor(cli): Finalize OpenAI Codex Integration with OAuth
- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults. - Updated the context compressor's summary target tokens to 2500 for improved performance. - Added external credential detection for Codex CLI to streamline authentication. - Refactored various components to ensure consistent handling of authentication and model selection across the application.
This commit is contained in:
parent
86b1db0598
commit
500f0eab4a
22 changed files with 1784 additions and 207 deletions
|
|
@ -8,7 +8,9 @@ Resolution order for text tasks:
|
|||
1. OpenRouter (OPENROUTER_API_KEY)
|
||||
2. Nous Portal (~/.hermes/auth.json active provider)
|
||||
3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
|
||||
4. None
|
||||
4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
|
||||
wrapped to look like a chat.completions client)
|
||||
5. None
|
||||
|
||||
Resolution order for vision/multimodal tasks:
|
||||
1. OpenRouter
|
||||
|
|
@ -20,7 +22,8 @@ import json
|
|||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional, Tuple
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
|
|
@ -49,6 +52,188 @@ _NOUS_MODEL = "gemini-3-flash"
|
|||
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
|
||||
_AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"
|
||||
|
||||
# Codex fallback: uses the Responses API (the only endpoint the Codex
|
||||
# OAuth token can access) with a fast model for auxiliary tasks.
|
||||
_CODEX_AUX_MODEL = "gpt-5.3-codex"
|
||||
_CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
|
||||
|
||||
|
||||
# ── Codex Responses → chat.completions adapter ─────────────────────────────
|
||||
# All auxiliary consumers call client.chat.completions.create(**kwargs) and
|
||||
# read response.choices[0].message.content. This adapter translates those
|
||||
# calls to the Codex Responses API so callers don't need any changes.
|
||||
|
||||
class _CodexCompletionsAdapter:
|
||||
"""Drop-in shim that accepts chat.completions.create() kwargs and
|
||||
routes them through the Codex Responses streaming API."""
|
||||
|
||||
def __init__(self, real_client: OpenAI, model: str):
|
||||
self._client = real_client
|
||||
self._model = model
|
||||
|
||||
def create(self, **kwargs) -> Any:
|
||||
messages = kwargs.get("messages", [])
|
||||
model = kwargs.get("model", self._model)
|
||||
temperature = kwargs.get("temperature")
|
||||
|
||||
# Separate system/instructions from conversation messages
|
||||
instructions = "You are a helpful assistant."
|
||||
input_msgs: List[Dict[str, Any]] = []
|
||||
for msg in messages:
|
||||
role = msg.get("role", "user")
|
||||
content = msg.get("content", "")
|
||||
if role == "system":
|
||||
instructions = content
|
||||
else:
|
||||
input_msgs.append({"role": role, "content": content})
|
||||
|
||||
resp_kwargs: Dict[str, Any] = {
|
||||
"model": model,
|
||||
"instructions": instructions,
|
||||
"input": input_msgs or [{"role": "user", "content": ""}],
|
||||
"stream": True,
|
||||
"store": False,
|
||||
}
|
||||
|
||||
max_tokens = kwargs.get("max_output_tokens") or kwargs.get("max_completion_tokens") or kwargs.get("max_tokens")
|
||||
if max_tokens is not None:
|
||||
resp_kwargs["max_output_tokens"] = int(max_tokens)
|
||||
if temperature is not None:
|
||||
resp_kwargs["temperature"] = temperature
|
||||
|
||||
# Tools support for flush_memories and similar callers
|
||||
tools = kwargs.get("tools")
|
||||
if tools:
|
||||
converted = []
|
||||
for t in tools:
|
||||
fn = t.get("function", {}) if isinstance(t, dict) else {}
|
||||
name = fn.get("name")
|
||||
if not name:
|
||||
continue
|
||||
converted.append({
|
||||
"type": "function",
|
||||
"name": name,
|
||||
"description": fn.get("description", ""),
|
||||
"parameters": fn.get("parameters", {}),
|
||||
})
|
||||
if converted:
|
||||
resp_kwargs["tools"] = converted
|
||||
|
||||
# Stream and collect the response
|
||||
text_parts: List[str] = []
|
||||
tool_calls_raw: List[Any] = []
|
||||
usage = None
|
||||
|
||||
try:
|
||||
with self._client.responses.stream(**resp_kwargs) as stream:
|
||||
for _event in stream:
|
||||
pass
|
||||
final = stream.get_final_response()
|
||||
|
||||
# Extract text and tool calls from the Responses output
|
||||
for item in getattr(final, "output", []):
|
||||
item_type = getattr(item, "type", None)
|
||||
if item_type == "message":
|
||||
for part in getattr(item, "content", []):
|
||||
ptype = getattr(part, "type", None)
|
||||
if ptype in ("output_text", "text"):
|
||||
text_parts.append(getattr(part, "text", ""))
|
||||
elif item_type == "function_call":
|
||||
tool_calls_raw.append(SimpleNamespace(
|
||||
id=getattr(item, "call_id", ""),
|
||||
type="function",
|
||||
function=SimpleNamespace(
|
||||
name=getattr(item, "name", ""),
|
||||
arguments=getattr(item, "arguments", "{}"),
|
||||
),
|
||||
))
|
||||
|
||||
resp_usage = getattr(final, "usage", None)
|
||||
if resp_usage:
|
||||
usage = SimpleNamespace(
|
||||
prompt_tokens=getattr(resp_usage, "input_tokens", 0),
|
||||
completion_tokens=getattr(resp_usage, "output_tokens", 0),
|
||||
total_tokens=getattr(resp_usage, "total_tokens", 0),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Codex auxiliary Responses API call failed: %s", exc)
|
||||
raise
|
||||
|
||||
content = "".join(text_parts).strip() or None
|
||||
|
||||
# Build a response that looks like chat.completions
|
||||
message = SimpleNamespace(
|
||||
role="assistant",
|
||||
content=content,
|
||||
tool_calls=tool_calls_raw or None,
|
||||
)
|
||||
choice = SimpleNamespace(
|
||||
index=0,
|
||||
message=message,
|
||||
finish_reason="stop" if not tool_calls_raw else "tool_calls",
|
||||
)
|
||||
return SimpleNamespace(
|
||||
choices=[choice],
|
||||
model=model,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
|
||||
class _CodexChatShim:
|
||||
"""Wraps the adapter to provide client.chat.completions.create()."""
|
||||
|
||||
def __init__(self, adapter: _CodexCompletionsAdapter):
|
||||
self.completions = adapter
|
||||
|
||||
|
||||
class CodexAuxiliaryClient:
|
||||
"""OpenAI-client-compatible wrapper that routes through Codex Responses API.
|
||||
|
||||
Consumers can call client.chat.completions.create(**kwargs) as normal.
|
||||
Also exposes .api_key and .base_url for introspection by async wrappers.
|
||||
"""
|
||||
|
||||
def __init__(self, real_client: OpenAI, model: str):
|
||||
self._real_client = real_client
|
||||
adapter = _CodexCompletionsAdapter(real_client, model)
|
||||
self.chat = _CodexChatShim(adapter)
|
||||
self.api_key = real_client.api_key
|
||||
self.base_url = real_client.base_url
|
||||
|
||||
def close(self):
|
||||
self._real_client.close()
|
||||
|
||||
|
||||
class _AsyncCodexCompletionsAdapter:
|
||||
"""Async version of the Codex Responses adapter.
|
||||
|
||||
Wraps the sync adapter via asyncio.to_thread() so async consumers
|
||||
(web_tools, session_search) can await it as normal.
|
||||
"""
|
||||
|
||||
def __init__(self, sync_adapter: _CodexCompletionsAdapter):
|
||||
self._sync = sync_adapter
|
||||
|
||||
async def create(self, **kwargs) -> Any:
|
||||
import asyncio
|
||||
return await asyncio.to_thread(self._sync.create, **kwargs)
|
||||
|
||||
|
||||
class _AsyncCodexChatShim:
|
||||
def __init__(self, adapter: _AsyncCodexCompletionsAdapter):
|
||||
self.completions = adapter
|
||||
|
||||
|
||||
class AsyncCodexAuxiliaryClient:
|
||||
"""Async-compatible wrapper matching AsyncOpenAI.chat.completions.create()."""
|
||||
|
||||
def __init__(self, sync_wrapper: "CodexAuxiliaryClient"):
|
||||
sync_adapter = sync_wrapper.chat.completions
|
||||
async_adapter = _AsyncCodexCompletionsAdapter(sync_adapter)
|
||||
self.chat = _AsyncCodexChatShim(async_adapter)
|
||||
self.api_key = sync_wrapper.api_key
|
||||
self.base_url = sync_wrapper.base_url
|
||||
|
||||
|
||||
def _read_nous_auth() -> Optional[dict]:
|
||||
"""Read and validate ~/.hermes/auth.json for an active Nous provider.
|
||||
|
|
@ -82,12 +267,31 @@ def _nous_base_url() -> str:
|
|||
return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
|
||||
|
||||
|
||||
def _read_codex_access_token() -> Optional[str]:
|
||||
"""Read a valid Codex OAuth access token from ~/.codex/auth.json."""
|
||||
try:
|
||||
codex_auth = Path.home() / ".codex" / "auth.json"
|
||||
if not codex_auth.is_file():
|
||||
return None
|
||||
data = json.loads(codex_auth.read_text())
|
||||
tokens = data.get("tokens")
|
||||
if not isinstance(tokens, dict):
|
||||
return None
|
||||
access_token = tokens.get("access_token")
|
||||
if isinstance(access_token, str) and access_token.strip():
|
||||
return access_token.strip()
|
||||
return None
|
||||
except Exception as exc:
|
||||
logger.debug("Could not read Codex auth for auxiliary client: %s", exc)
|
||||
return None
|
||||
|
||||
|
||||
# ── Public API ──────────────────────────────────────────────────────────────
|
||||
|
||||
def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Return (client, model_slug) for text-only auxiliary tasks.
|
||||
|
||||
Falls through OpenRouter -> Nous Portal -> custom endpoint -> (None, None).
|
||||
Falls through OpenRouter -> Nous Portal -> custom endpoint -> Codex OAuth -> (None, None).
|
||||
"""
|
||||
# 1. OpenRouter
|
||||
or_key = os.getenv("OPENROUTER_API_KEY")
|
||||
|
|
@ -115,11 +319,44 @@ def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||
logger.debug("Auxiliary text client: custom endpoint (%s)", model)
|
||||
return OpenAI(api_key=custom_key, base_url=custom_base), model
|
||||
|
||||
# 4. Nothing available
|
||||
# 4. Codex OAuth -- uses the Responses API (only endpoint the token
|
||||
# can access), wrapped to look like a chat.completions client.
|
||||
codex_token = _read_codex_access_token()
|
||||
if codex_token:
|
||||
logger.debug("Auxiliary text client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
|
||||
real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
|
||||
return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
|
||||
|
||||
# 5. Nothing available
|
||||
logger.debug("Auxiliary text client: none available")
|
||||
return None, None
|
||||
|
||||
|
||||
def get_async_text_auxiliary_client():
|
||||
"""Return (async_client, model_slug) for async consumers.
|
||||
|
||||
For standard providers returns (AsyncOpenAI, model). For Codex returns
|
||||
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
|
||||
Returns (None, None) when no provider is available.
|
||||
"""
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
sync_client, model = get_text_auxiliary_client()
|
||||
if sync_client is None:
|
||||
return None, None
|
||||
|
||||
if isinstance(sync_client, CodexAuxiliaryClient):
|
||||
return AsyncCodexAuxiliaryClient(sync_client), model
|
||||
|
||||
async_kwargs = {
|
||||
"api_key": sync_client.api_key,
|
||||
"base_url": str(sync_client.base_url),
|
||||
}
|
||||
if "openrouter" in str(sync_client.base_url).lower():
|
||||
async_kwargs["default_headers"] = dict(_OR_HEADERS)
|
||||
return AsyncOpenAI(**async_kwargs), model
|
||||
|
||||
|
||||
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
"""Return (client, model_slug) for vision/multimodal auxiliary tasks.
|
||||
|
||||
|
|
@ -161,11 +398,12 @@ def auxiliary_max_tokens_param(value: int) -> dict:
|
|||
|
||||
OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer
|
||||
models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'.
|
||||
The Codex adapter translates max_tokens internally, so we use max_tokens
|
||||
for it as well.
|
||||
"""
|
||||
custom_base = os.getenv("OPENAI_BASE_URL", "")
|
||||
or_key = os.getenv("OPENROUTER_API_KEY")
|
||||
# Only use max_completion_tokens when the auxiliary client resolved to
|
||||
# direct OpenAI (no OpenRouter key, no Nous auth, custom endpoint is api.openai.com)
|
||||
# Only use max_completion_tokens for direct OpenAI custom endpoints
|
||||
if (not or_key
|
||||
and _read_nous_auth() is None
|
||||
and "api.openai.com" in custom_base.lower()):
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ class ContextCompressor:
|
|||
threshold_percent: float = 0.85,
|
||||
protect_first_n: int = 3,
|
||||
protect_last_n: int = 4,
|
||||
summary_target_tokens: int = 500,
|
||||
summary_target_tokens: int = 2500,
|
||||
quiet_mode: bool = False,
|
||||
summary_model_override: str = None,
|
||||
):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue