refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults. - Updated the context compressor's summary target tokens to 2500 for improved performance. - Added external credential detection for Codex CLI to streamline authentication. - Refactored various components to ensure consistent handling of authentication and model selection across the application.
2026-02-28 21:47:51 -08:00 · 2026-02-28 21:47:51 -08:00 · 500f0eab4a
commit 500f0eab4a
parent 86b1db0598
22 changed files with 1784 additions and 207 deletions
--- a/README.md
+++ b/README.md
@ -121,11 +121,14 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
 | Provider | Setup |
 |----------|-------|
-| **Nous Portal** | `hermes login` (OAuth, subscription-based) |
+| **Nous Portal** | `hermes model` (OAuth, subscription-based) |
 | **OpenAI Codex** | `hermes model` (ChatGPT OAuth, uses Codex models) |
 | **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` |
 | **Custom Endpoint** | `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` |
-**Note:** Even when using Nous Portal or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools.
+**Codex note:** The OpenAI Codex provider authenticates via device code (open a URL, enter a code). Credentials are stored at `~/.codex/auth.json` and auto-refresh. No Codex CLI installation required.
 **Note:** Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools.
 ---
@ -368,7 +371,7 @@ hermes --resume <id>      # Resume a specific session (-r)
 # Provider & model management
 hermes model              # Switch provider and model interactively
-hermes login              # Authenticate with Nous Portal (OAuth)
+hermes model              # Select provider and model
 hermes logout             # Clear stored OAuth credentials
 # Configuration
@ -1638,7 +1641,7 @@ All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set t
 |------|-------------|
 | `~/.hermes/config.yaml` | Your settings |
 | `~/.hermes/.env` | API keys and secrets |
-| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes login`) |
+| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes model`) |
 | `~/.hermes/cron/` | Scheduled jobs data |
 | `~/.hermes/sessions/` | Gateway session data |
 | `~/.hermes/hermes-agent/` | Installation directory |
@ -1666,7 +1669,7 @@ hermes config    # View current settings
 Common issues:
 - **"API key not set"**: Run `hermes setup` or `hermes config set OPENROUTER_API_KEY your_key`
 - **"hermes: command not found"**: Reload your shell (`source ~/.bashrc`) or check PATH
- **"Run `hermes login` to re-authenticate"**: Your Nous Portal session expired. Run `hermes login` to refresh.
+- **"Run `hermes setup` to re-authenticate"**: Your Nous Portal session expired. Run `hermes setup` or `hermes model` to refresh.
 - **"No active paid subscription"**: Your Nous Portal account needs an active subscription for inference.
 - **Gateway won't start**: Check `hermes gateway status` and logs
 - **Missing config after update**: Run `hermes config check` to see what's new, then `hermes config migrate` to add missing options
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -8,7 +8,9 @@ Resolution order for text tasks:
  1. OpenRouter  (OPENROUTER_API_KEY)
  2. Nous Portal (~/.hermes/auth.json active provider)
  3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
-  4. None
+  4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
     wrapped to look like a chat.completions client)
  5. None
 Resolution order for vision/multimodal tasks:
  1. OpenRouter
@ -20,7 +22,8 @@ import json
 import logging
 import os
 from pathlib import Path
-from typing import Optional, Tuple
+from types import SimpleNamespace
 from typing import Any, Dict, List, Optional, Tuple
 from openai import OpenAI
@ -49,6 +52,188 @@ _NOUS_MODEL = "gemini-3-flash"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"
 # Codex fallback: uses the Responses API (the only endpoint the Codex
 # OAuth token can access) with a fast model for auxiliary tasks.
 _CODEX_AUX_MODEL = "gpt-5.3-codex"
 _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
 # ── Codex Responses → chat.completions adapter ─────────────────────────────
 # All auxiliary consumers call client.chat.completions.create(**kwargs) and
 # read response.choices[0].message.content. This adapter translates those
 # calls to the Codex Responses API so callers don't need any changes.
 class _CodexCompletionsAdapter:
    """Drop-in shim that accepts chat.completions.create() kwargs and
    routes them through the Codex Responses streaming API."""
    def __init__(self, real_client: OpenAI, model: str):
        self._client = real_client
        self._model = model
    def create(self, **kwargs) -> Any:
        messages = kwargs.get("messages", [])
        model = kwargs.get("model", self._model)
        temperature = kwargs.get("temperature")
        # Separate system/instructions from conversation messages
        instructions = "You are a helpful assistant."
        input_msgs: List[Dict[str, Any]] = []
        for msg in messages:
            role = msg.get("role", "user")
            content = msg.get("content", "")
            if role == "system":
                instructions = content
            else:
                input_msgs.append({"role": role, "content": content})
        resp_kwargs: Dict[str, Any] = {
            "model": model,
            "instructions": instructions,
            "input": input_msgs or [{"role": "user", "content": ""}],
            "stream": True,
            "store": False,
        }
        max_tokens = kwargs.get("max_output_tokens") or kwargs.get("max_completion_tokens") or kwargs.get("max_tokens")
        if max_tokens is not None:
            resp_kwargs["max_output_tokens"] = int(max_tokens)
        if temperature is not None:
            resp_kwargs["temperature"] = temperature
        # Tools support for flush_memories and similar callers
        tools = kwargs.get("tools")
        if tools:
            converted = []
            for t in tools:
                fn = t.get("function", {}) if isinstance(t, dict) else {}
                name = fn.get("name")
                if not name:
                    continue
                converted.append({
                    "type": "function",
                    "name": name,
                    "description": fn.get("description", ""),
                    "parameters": fn.get("parameters", {}),
                })
            if converted:
                resp_kwargs["tools"] = converted
        # Stream and collect the response
        text_parts: List[str] = []
        tool_calls_raw: List[Any] = []
        usage = None
        try:
            with self._client.responses.stream(**resp_kwargs) as stream:
                for _event in stream:
                    pass
                final = stream.get_final_response()
            # Extract text and tool calls from the Responses output
            for item in getattr(final, "output", []):
                item_type = getattr(item, "type", None)
                if item_type == "message":
                    for part in getattr(item, "content", []):
                        ptype = getattr(part, "type", None)
                        if ptype in ("output_text", "text"):
                            text_parts.append(getattr(part, "text", ""))
                elif item_type == "function_call":
                    tool_calls_raw.append(SimpleNamespace(
                        id=getattr(item, "call_id", ""),
                        type="function",
                        function=SimpleNamespace(
                            name=getattr(item, "name", ""),
                            arguments=getattr(item, "arguments", "{}"),
                        ),
                    ))
            resp_usage = getattr(final, "usage", None)
            if resp_usage:
                usage = SimpleNamespace(
                    prompt_tokens=getattr(resp_usage, "input_tokens", 0),
                    completion_tokens=getattr(resp_usage, "output_tokens", 0),
                    total_tokens=getattr(resp_usage, "total_tokens", 0),
                )
        except Exception as exc:
            logger.debug("Codex auxiliary Responses API call failed: %s", exc)
            raise
        content = "".join(text_parts).strip() or None
        # Build a response that looks like chat.completions
        message = SimpleNamespace(
            role="assistant",
            content=content,
            tool_calls=tool_calls_raw or None,
        )
        choice = SimpleNamespace(
            index=0,
            message=message,
            finish_reason="stop" if not tool_calls_raw else "tool_calls",
        )
        return SimpleNamespace(
            choices=[choice],
            model=model,
            usage=usage,
        )
 class _CodexChatShim:
    """Wraps the adapter to provide client.chat.completions.create()."""
    def __init__(self, adapter: _CodexCompletionsAdapter):
        self.completions = adapter
 class CodexAuxiliaryClient:
    """OpenAI-client-compatible wrapper that routes through Codex Responses API.
    Consumers can call client.chat.completions.create(**kwargs) as normal.
    Also exposes .api_key and .base_url for introspection by async wrappers.
    """
    def __init__(self, real_client: OpenAI, model: str):
        self._real_client = real_client
        adapter = _CodexCompletionsAdapter(real_client, model)
        self.chat = _CodexChatShim(adapter)
        self.api_key = real_client.api_key
        self.base_url = real_client.base_url
    def close(self):
        self._real_client.close()
 class _AsyncCodexCompletionsAdapter:
    """Async version of the Codex Responses adapter.
    Wraps the sync adapter via asyncio.to_thread() so async consumers
    (web_tools, session_search) can await it as normal.
    """
    def __init__(self, sync_adapter: _CodexCompletionsAdapter):
        self._sync = sync_adapter
    async def create(self, **kwargs) -> Any:
        import asyncio
        return await asyncio.to_thread(self._sync.create, **kwargs)
 class _AsyncCodexChatShim:
    def __init__(self, adapter: _AsyncCodexCompletionsAdapter):
        self.completions = adapter
 class AsyncCodexAuxiliaryClient:
    """Async-compatible wrapper matching AsyncOpenAI.chat.completions.create()."""
    def __init__(self, sync_wrapper: "CodexAuxiliaryClient"):
        sync_adapter = sync_wrapper.chat.completions
        async_adapter = _AsyncCodexCompletionsAdapter(sync_adapter)
        self.chat = _AsyncCodexChatShim(async_adapter)
        self.api_key = sync_wrapper.api_key
        self.base_url = sync_wrapper.base_url
 def _read_nous_auth() -> Optional[dict]:
    """Read and validate ~/.hermes/auth.json for an active Nous provider.
@ -82,12 +267,31 @@ def _nous_base_url() -> str:
    return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)
 def _read_codex_access_token() -> Optional[str]:
    """Read a valid Codex OAuth access token from ~/.codex/auth.json."""
    try:
        codex_auth = Path.home() / ".codex" / "auth.json"
        if not codex_auth.is_file():
            return None
        data = json.loads(codex_auth.read_text())
        tokens = data.get("tokens")
        if not isinstance(tokens, dict):
            return None
        access_token = tokens.get("access_token")
        if isinstance(access_token, str) and access_token.strip():
            return access_token.strip()
        return None
    except Exception as exc:
        logger.debug("Could not read Codex auth for auxiliary client: %s", exc)
        return None
 # ── Public API ──────────────────────────────────────────────────────────────
 def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
    """Return (client, model_slug) for text-only auxiliary tasks.
-    Falls through OpenRouter -> Nous Portal -> custom endpoint -> (None, None).
+    Falls through OpenRouter -> Nous Portal -> custom endpoint -> Codex OAuth -> (None, None).
    """
    # 1. OpenRouter
    or_key = os.getenv("OPENROUTER_API_KEY")
@ -115,11 +319,44 @@ def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
        logger.debug("Auxiliary text client: custom endpoint (%s)", model)
        return OpenAI(api_key=custom_key, base_url=custom_base), model
-    # 4. Nothing available
+    # 4. Codex OAuth -- uses the Responses API (only endpoint the token
    # can access), wrapped to look like a chat.completions client.
    codex_token = _read_codex_access_token()
    if codex_token:
        logger.debug("Auxiliary text client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
        real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
        return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
    # 5. Nothing available
    logger.debug("Auxiliary text client: none available")
    return None, None
 def get_async_text_auxiliary_client():
    """Return (async_client, model_slug) for async consumers.
    For standard providers returns (AsyncOpenAI, model). For Codex returns
    (AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
    Returns (None, None) when no provider is available.
    """
    from openai import AsyncOpenAI
    sync_client, model = get_text_auxiliary_client()
    if sync_client is None:
        return None, None
    if isinstance(sync_client, CodexAuxiliaryClient):
        return AsyncCodexAuxiliaryClient(sync_client), model
    async_kwargs = {
        "api_key": sync_client.api_key,
        "base_url": str(sync_client.base_url),
    }
    if "openrouter" in str(sync_client.base_url).lower():
        async_kwargs["default_headers"] = dict(_OR_HEADERS)
    return AsyncOpenAI(**async_kwargs), model
 def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
    """Return (client, model_slug) for vision/multimodal auxiliary tasks.
@ -161,11 +398,12 @@ def auxiliary_max_tokens_param(value: int) -> dict:
    OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer
    models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'.
    The Codex adapter translates max_tokens internally, so we use max_tokens
    for it as well.
    """
    custom_base = os.getenv("OPENAI_BASE_URL", "")
    or_key = os.getenv("OPENROUTER_API_KEY")
-    # Only use max_completion_tokens when the auxiliary client resolved to
+    # Only use max_completion_tokens for direct OpenAI custom endpoints
    # direct OpenAI (no OpenRouter key, no Nous auth, custom endpoint is api.openai.com)
    if (not or_key
            and _read_nous_auth() is None
            and "api.openai.com" in custom_base.lower()):
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -31,7 +31,7 @@ class ContextCompressor:
        threshold_percent: float = 0.85,
        protect_first_n: int = 3,
        protect_last_n: int = 4,
-        summary_target_tokens: int = 500,
+        summary_target_tokens: int = 2500,
        quiet_mode: bool = False,
        summary_model_override: str = None,
    ):
--- a/cli.py
+++ b/cli.py
@ -841,12 +841,10 @@ class HermesCLI:
            or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"])
        )
        self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
        # Max turns priority: CLI arg > env var > config file (agent.max_turns or root max_turns) > default
        if max_turns != 60:  # CLI arg was explicitly set
        self._nous_key_expires_at: Optional[str] = None
        self._nous_key_source: Optional[str] = None
-        # Max turns priority: CLI arg > config file > env var > default
+        # Max turns priority: CLI arg > env var > config file (agent.max_turns or root max_turns) > default
-        if max_turns is not None:
+        if max_turns is not None:  # CLI arg was explicitly set
            self.max_turns = max_turns
        elif CLI_CONFIG["agent"].get("max_turns"):
            self.max_turns = CLI_CONFIG["agent"]["max_turns"]
--- a/docs/cli.md
+++ b/docs/cli.md
@ -12,7 +12,7 @@ hermes
 hermes --model "anthropic/claude-sonnet-4"
 # With specific provider
-hermes --provider nous        # Use Nous Portal (requires: hermes login)
+hermes --provider nous        # Use Nous Portal (requires: hermes model)
 hermes --provider openrouter  # Force OpenRouter
 # With specific toolsets
@ -93,7 +93,7 @@ model:
 ```
 **Provider selection** (`provider` field):
- `auto` (default): Uses Nous Portal if logged in (`hermes login`), otherwise falls back to OpenRouter/env vars.
+- `auto` (default): Uses Nous Portal if logged in (`hermes model`), otherwise falls back to OpenRouter/env vars.
 - `openrouter`: Always uses `OPENROUTER_API_KEY` from `.env`.
 - `nous`: Always uses Nous Portal OAuth credentials from `auth.json`.
--- a/gateway/run.py
+++ b/gateway/run.py
@ -214,17 +214,12 @@ class GatewayRunner:
                return
            from run_agent import AIAgent
-            _flush_api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "")
+            runtime_kwargs = _resolve_runtime_agent_kwargs()
-            _flush_base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
+            if not runtime_kwargs.get("api_key"):
            _flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL", "anthropic/claude-opus-4.6")
            if not _flush_api_key:
                return
            tmp_agent = AIAgent(
-                model=_flush_model,
+                **runtime_kwargs,
                api_key=_flush_api_key,
                base_url=_flush_base_url,
                max_iterations=8,
                quiet_mode=True,
                enabled_toolsets=["memory", "skills"],
@ -979,12 +974,10 @@ class GatewayRunner:
                if old_history:
                    from run_agent import AIAgent
                    loop = asyncio.get_event_loop()
-                    # Resolve credentials so the flush agent can reach the LLM
+                    _flush_kwargs = _resolve_runtime_agent_kwargs()
                    _flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
                    def _do_flush():
                        tmp_agent = AIAgent(
-                            model=_flush_model,
+                            **_flush_kwargs,
                            **_resolve_runtime_agent_kwargs(),
                            max_iterations=5,
                            quiet_mode=True,
                            enabled_toolsets=["memory"],
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@ -10,7 +10,7 @@ Architecture:
 - Auth store (auth.json) holds per-provider credential state
 - resolve_provider() picks the active provider via priority chain
 - resolve_*_runtime_credentials() handles token refresh and key minting
- login_command() / logout_command() are the CLI entry points
+- logout_command() is the CLI entry point for clearing auth
 """
 from __future__ import annotations
@ -127,7 +127,7 @@ def format_auth_error(error: Exception) -> str:
        return str(error)
    if error.relogin_required:
-        return f"{error} Run `hermes login` to re-authenticate."
+        return f"{error} Run `hermes model` to re-authenticate."
    if error.code == "subscription_required":
        return (
@ -1172,6 +1172,39 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
    return {"logged_in": False}
 # =============================================================================
 # External credential detection
 # =============================================================================
 def detect_external_credentials() -> List[Dict[str, Any]]:
    """Scan for credentials from other CLI tools that Hermes can reuse.
    Returns a list of dicts, each with:
      - provider: str   -- Hermes provider id (e.g. "openai-codex")
      - path: str       -- filesystem path where creds were found
      - label: str      -- human-friendly description for the setup UI
    """
    found: List[Dict[str, Any]] = []
    # Codex CLI: ~/.codex/auth.json (or $CODEX_HOME/auth.json)
    try:
        codex_home = resolve_codex_home_path()
        codex_auth = codex_home / "auth.json"
        if codex_auth.is_file():
            data = json.loads(codex_auth.read_text())
            tokens = data.get("tokens", {})
            if isinstance(tokens, dict) and tokens.get("access_token"):
                found.append({
                    "provider": "openai-codex",
                    "path": str(codex_auth),
                    "label": f"Codex CLI credentials found ({codex_auth})",
                })
    except Exception:
        pass
    return found
 # =============================================================================
 # CLI Commands — login / logout
 # =============================================================================
@ -1328,56 +1361,43 @@ def _save_model_choice(model_id: str) -> None:
 def login_command(args) -> None:
-    """Run OAuth device code login for the selected provider."""
+    """Deprecated: use 'hermes model' or 'hermes setup' instead."""
-    provider_id = getattr(args, "provider", None) or "nous"
+    print("The 'hermes login' command has been removed.")
-
+    print("Use 'hermes model' to select a provider and model,")
-    if provider_id not in PROVIDER_REGISTRY:
+    print("or 'hermes setup' for full interactive setup.")
-        print(f"Unknown provider: {provider_id}")
+    raise SystemExit(0)
        print(f"Available: {', '.join(PROVIDER_REGISTRY.keys())}")
        raise SystemExit(1)
    pconfig = PROVIDER_REGISTRY[provider_id]
    if provider_id == "nous":
        _login_nous(args, pconfig)
    elif provider_id == "openai-codex":
        _login_openai_codex(args, pconfig)
    else:
        print(f"Login for provider '{provider_id}' is not yet implemented.")
        raise SystemExit(1)
 def _login_openai_codex(args, pconfig: ProviderConfig) -> None:
-    """OpenAI Codex login flow using Codex CLI auth state."""
+    """OpenAI Codex login via device code flow (no Codex CLI required)."""
-    codex_path = shutil.which("codex")
+    codex_home = resolve_codex_home_path()
    if not codex_path:
        print("Codex CLI was not found in PATH.")
        print("Install Codex CLI, then retry `hermes login --provider openai-codex`.")
        raise SystemExit(1)
-    print(f"Starting Hermes login via {pconfig.name}...")
+    # Check for existing valid credentials first
    print(f"Using Codex CLI: {codex_path}")
    print(f"Codex home: {resolve_codex_home_path()}")
    creds: Dict[str, Any]
    try:
-        creds = resolve_codex_runtime_credentials()
+        existing = resolve_codex_runtime_credentials()
        print(f"Existing Codex credentials found at {codex_home / 'auth.json'}")
        try:
            reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
        except (EOFError, KeyboardInterrupt):
            reuse = "y"
        if reuse in ("", "y", "yes"):
            creds = existing
            _save_codex_provider_state(creds)
            return
    except AuthError:
-        print("No usable Codex auth found. Running `codex login`...")
+        pass
        try:
            subprocess.run(["codex", "login"], check=True)
        except subprocess.CalledProcessError as exc:
            print(f"Codex login failed with exit code {exc.returncode}.")
            raise SystemExit(1)
        except KeyboardInterrupt:
            print("\nLogin cancelled.")
            raise SystemExit(130)
        try:
            creds = resolve_codex_runtime_credentials()
        except AuthError as exc:
            print(format_auth_error(exc))
            raise SystemExit(1)
    # No existing creds (or user declined) -- run device code flow
    print()
    print("Signing in to OpenAI Codex...")
    print()
    creds = _codex_device_code_login()
    _save_codex_provider_state(creds)
 def _save_codex_provider_state(creds: Dict[str, Any]) -> None:
    """Persist Codex provider state to auth store and config."""
    auth_state = {
        "auth_file": creds.get("auth_file"),
        "codex_home": creds.get("codex_home"),
@ -1391,13 +1411,170 @@ def _login_openai_codex(args, pconfig: ProviderConfig) -> None:
        _save_provider_state(auth_store, "openai-codex", auth_state)
        saved_to = _save_auth_store(auth_store)
-    config_path = _update_config_for_provider("openai-codex", creds["base_url"])
+    config_path = _update_config_for_provider("openai-codex", creds.get("base_url", DEFAULT_CODEX_BASE_URL))
    print()
    print("Login successful!")
    print(f"  Auth state: {saved_to}")
    print(f"  Config updated: {config_path} (model.provider=openai-codex)")
 def _codex_device_code_login() -> Dict[str, Any]:
    """Run the OpenAI device code login flow and return credentials dict."""
    import time as _time
    issuer = "https://auth.openai.com"
    client_id = CODEX_OAUTH_CLIENT_ID
    # Step 1: Request device code
    try:
        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
            resp = client.post(
                f"{issuer}/api/accounts/deviceauth/usercode",
                json={"client_id": client_id},
                headers={"Content-Type": "application/json"},
            )
    except Exception as exc:
        raise AuthError(
            f"Failed to request device code: {exc}",
            provider="openai-codex", code="device_code_request_failed",
        )
    if resp.status_code != 200:
        raise AuthError(
            f"Device code request returned status {resp.status_code}.",
            provider="openai-codex", code="device_code_request_error",
        )
    device_data = resp.json()
    user_code = device_data.get("user_code", "")
    device_auth_id = device_data.get("device_auth_id", "")
    poll_interval = max(3, int(device_data.get("interval", "5")))
    if not user_code or not device_auth_id:
        raise AuthError(
            "Device code response missing required fields.",
            provider="openai-codex", code="device_code_incomplete",
        )
    # Step 2: Show user the code
    print("To continue, follow these steps:\n")
    print(f"  1. Open this URL in your browser:")
    print(f"     \033[94m{issuer}/codex/device\033[0m\n")
    print(f"  2. Enter this code:")
    print(f"     \033[94m{user_code}\033[0m\n")
    print("Waiting for sign-in... (press Ctrl+C to cancel)")
    # Step 3: Poll for authorization code
    max_wait = 15 * 60  # 15 minutes
    start = _time.monotonic()
    code_resp = None
    try:
        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
            while _time.monotonic() - start < max_wait:
                _time.sleep(poll_interval)
                poll_resp = client.post(
                    f"{issuer}/api/accounts/deviceauth/token",
                    json={"device_auth_id": device_auth_id, "user_code": user_code},
                    headers={"Content-Type": "application/json"},
                )
                if poll_resp.status_code == 200:
                    code_resp = poll_resp.json()
                    break
                elif poll_resp.status_code in (403, 404):
                    continue  # User hasn't completed login yet
                else:
                    raise AuthError(
                        f"Device auth polling returned status {poll_resp.status_code}.",
                        provider="openai-codex", code="device_code_poll_error",
                    )
    except KeyboardInterrupt:
        print("\nLogin cancelled.")
        raise SystemExit(130)
    if code_resp is None:
        raise AuthError(
            "Login timed out after 15 minutes.",
            provider="openai-codex", code="device_code_timeout",
        )
    # Step 4: Exchange authorization code for tokens
    authorization_code = code_resp.get("authorization_code", "")
    code_verifier = code_resp.get("code_verifier", "")
    redirect_uri = f"{issuer}/deviceauth/callback"
    if not authorization_code or not code_verifier:
        raise AuthError(
            "Device auth response missing authorization_code or code_verifier.",
            provider="openai-codex", code="device_code_incomplete_exchange",
        )
    try:
        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
            token_resp = client.post(
                CODEX_OAUTH_TOKEN_URL,
                data={
                    "grant_type": "authorization_code",
                    "code": authorization_code,
                    "redirect_uri": redirect_uri,
                    "client_id": client_id,
                    "code_verifier": code_verifier,
                },
                headers={"Content-Type": "application/x-www-form-urlencoded"},
            )
    except Exception as exc:
        raise AuthError(
            f"Token exchange failed: {exc}",
            provider="openai-codex", code="token_exchange_failed",
        )
    if token_resp.status_code != 200:
        raise AuthError(
            f"Token exchange returned status {token_resp.status_code}.",
            provider="openai-codex", code="token_exchange_error",
        )
    tokens = token_resp.json()
    access_token = tokens.get("access_token", "")
    refresh_token = tokens.get("refresh_token", "")
    if not access_token:
        raise AuthError(
            "Token exchange did not return an access_token.",
            provider="openai-codex", code="token_exchange_no_access_token",
        )
    # Step 5: Persist tokens to ~/.codex/auth.json
    codex_home = resolve_codex_home_path()
    codex_home.mkdir(parents=True, exist_ok=True)
    auth_path = codex_home / "auth.json"
    payload = {
        "tokens": {
            "access_token": access_token,
            "refresh_token": refresh_token,
        },
        "last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
    }
    _persist_codex_auth_payload(auth_path, payload, lock_held=False)
    base_url = (
        os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/")
        or DEFAULT_CODEX_BASE_URL
    )
    return {
        "api_key": access_token,
        "base_url": base_url,
        "auth_file": str(auth_path),
        "codex_home": str(codex_home),
        "last_refresh": payload["last_refresh"],
        "auth_mode": "chatgpt",
        "source": "device-code",
    }
 def _login_nous(args, pconfig: ProviderConfig) -> None:
    """Nous Portal device authorization flow."""
    portal_base_url = (
@ -1579,6 +1756,6 @@ def logout_command(args) -> None:
        if os.getenv("OPENROUTER_API_KEY"):
            print("Hermes will use OpenRouter for inference.")
        else:
-            print("Run `hermes login` or configure an API key to use Hermes.")
+            print("Run `hermes model` or configure an API key to use Hermes.")
    else:
        print(f"No auth state found for {provider_name}.")
--- a/hermes_cli/codex_models.py
+++ b/hermes_cli/codex_models.py
@ -1,21 +1,62 @@
-"""Codex model discovery from local Codex CLI cache/config."""
+"""Codex model discovery from API, local cache, and config."""
 from __future__ import annotations
 import json
 import logging
 from pathlib import Path
 from typing import List, Optional
 from hermes_cli.auth import resolve_codex_home_path
 logger = logging.getLogger(__name__)
 DEFAULT_CODEX_MODELS: List[str] = [
    "gpt-5-codex",
    "gpt-5.3-codex",
    "gpt-5.2-codex",
-    "gpt-5.1-codex",
+    "gpt-5.1-codex-max",
    "gpt-5.1-codex-mini",
 ]
 def _fetch_models_from_api(access_token: str) -> List[str]:
    """Fetch available models from the Codex API. Returns visible models sorted by priority."""
    try:
        import httpx
        resp = httpx.get(
            "https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
            headers={"Authorization": f"Bearer {access_token}"},
            timeout=10,
        )
        if resp.status_code != 200:
            return []
        data = resp.json()
        entries = data.get("models", []) if isinstance(data, dict) else []
    except Exception as exc:
        logger.debug("Failed to fetch Codex models from API: %s", exc)
        return []
    sortable = []
    for item in entries:
        if not isinstance(item, dict):
            continue
        slug = item.get("slug")
        if not isinstance(slug, str) or not slug.strip():
            continue
        slug = slug.strip()
        if item.get("supported_in_api") is False:
            continue
        visibility = item.get("visibility", "")
        if isinstance(visibility, str) and visibility.strip().lower() == "hide":
            continue
        priority = item.get("priority")
        rank = int(priority) if isinstance(priority, (int, float)) else 10_000
        sortable.append((rank, slug))
    sortable.sort(key=lambda x: (x[0], x[1]))
    return [slug for _, slug in sortable]
 def _read_default_model(codex_home: Path) -> Optional[str]:
    config_path = codex_home / "config.toml"
    if not config_path.exists():
@ -72,10 +113,22 @@ def _read_cache_models(codex_home: Path) -> List[str]:
    return deduped
-def get_codex_model_ids() -> List[str]:
+def get_codex_model_ids(access_token: Optional[str] = None) -> List[str]:
    """Return available Codex model IDs, trying API first, then local sources.
    Resolution order: API (live, if token provided) > config.toml default >
    local cache > hardcoded defaults.
    """
    codex_home = resolve_codex_home_path()
    ordered: List[str] = []
    # Try live API if we have a token
    if access_token:
        api_models = _fetch_models_from_api(access_token)
        if api_models:
            return api_models
    # Fall back to local sources
    default_model = _read_default_model(codex_home)
    if default_model:
        ordered.append(default_model)
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -12,7 +12,6 @@ Usage:
    hermes gateway install     # Install gateway service
    hermes gateway uninstall   # Uninstall gateway service
    hermes setup               # Interactive setup wizard
    hermes login               # Authenticate with Nous Portal (or other providers)
    hermes logout              # Clear stored authentication
    hermes status              # Show status of all components
    hermes cron                # Manage cron jobs
@ -547,7 +546,14 @@ def _model_flow_openai_codex(config, current_model=""):
            print(f"Login failed: {exc}")
            return
-    codex_models = get_codex_model_ids()
+    _codex_token = None
    try:
        from hermes_cli.auth import resolve_codex_runtime_credentials
        _codex_creds = resolve_codex_runtime_credentials()
        _codex_token = _codex_creds.get("api_key")
    except Exception:
        pass
    codex_models = get_codex_model_ids(access_token=_codex_token)
    selected = _prompt_model_selection(codex_models, current_model=current_model)
    if selected:
@ -827,8 +833,8 @@ def cmd_update(args):
            pass  # No systemd (macOS, WSL1, etc.) — skip silently
        print()
-        print("Tip: You can now log in with Nous Portal for inference:")
+        print("Tip: You can now select a provider and model:")
-        print("  hermes login              # Authenticate with Nous Portal")
+        print("  hermes model              # Select provider and model")
    except subprocess.CalledProcessError as e:
        print(f"✗ Update failed: {e}")
@ -848,7 +854,6 @@ Examples:
    hermes --continue             Resume the most recent session
    hermes --resume <session_id>  Resume a specific session
    hermes setup                  Run setup wizard
    hermes login                  Authenticate with an inference provider
    hermes logout                 Clear stored authentication
    hermes model                  Select default model
    hermes config                 View configuration
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -621,11 +621,23 @@ def run_setup_wizard(args):
        format_auth_error, AuthError, fetch_nous_models,
        resolve_nous_runtime_credentials, _update_config_for_provider,
        _login_openai_codex, get_codex_auth_status, DEFAULT_CODEX_BASE_URL,
        detect_external_credentials,
    )
    existing_custom = get_env_value("OPENAI_BASE_URL")
    existing_or = get_env_value("OPENROUTER_API_KEY")
    active_oauth = get_active_provider()
    # Detect credentials from other CLI tools
    detected_creds = detect_external_credentials()
    if detected_creds:
        print_info("Detected existing credentials:")
        for cred in detected_creds:
            if cred["provider"] == "openai-codex":
                print_success(f"  * {cred['label']} -- select \"OpenAI Codex\" to use it")
            else:
                print_info(f"  * {cred['label']}")
        print()
    # Detect if any provider is already configured
    has_any_provider = bool(active_oauth or existing_custom or existing_or)
@ -694,11 +706,11 @@ def run_setup_wizard(args):
        except SystemExit:
            print_warning("Nous Portal login was cancelled or failed.")
-            print_info("You can try again later with: hermes login")
+            print_info("You can try again later with: hermes model")
            selected_provider = None
        except Exception as e:
            print_error(f"Login failed: {e}")
-            print_info("You can try again later with: hermes login")
+            print_info("You can try again later with: hermes model")
            selected_provider = None
    elif provider_idx == 1:  # OpenAI Codex
@ -718,11 +730,11 @@ def run_setup_wizard(args):
            _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
        except SystemExit:
            print_warning("OpenAI Codex login was cancelled or failed.")
-            print_info("You can try again later with: hermes login --provider openai-codex")
+            print_info("You can try again later with: hermes model")
            selected_provider = None
        except Exception as e:
            print_error(f"Login failed: {e}")
-            print_info("You can try again later with: hermes login --provider openai-codex")
+            print_info("You can try again later with: hermes model")
            selected_provider = None
    elif provider_idx == 2:  # OpenRouter
@ -834,7 +846,15 @@ def run_setup_wizard(args):
            # else: keep current
        elif selected_provider == "openai-codex":
            from hermes_cli.codex_models import get_codex_model_ids
-            codex_models = get_codex_model_ids()
+            # Try to get the access token for live model discovery
            _codex_token = None
            try:
                from hermes_cli.auth import resolve_codex_runtime_credentials
                _codex_creds = resolve_codex_runtime_credentials()
                _codex_token = _codex_creds.get("api_key")
            except Exception:
                pass
            codex_models = get_codex_model_ids(access_token=_codex_token)
            model_choices = [f"{m}" for m in codex_models]
            model_choices.append("Custom model")
            model_choices.append(f"Keep current ({current_model})")
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@ -111,7 +111,7 @@ def show_status(args):
    nous_logged_in = bool(nous_status.get("logged_in"))
    print(
        f"  {'Nous Portal':<12}  {check_mark(nous_logged_in)} "
-        f"{'logged in' if nous_logged_in else 'not logged in (run: hermes login)'}"
+        f"{'logged in' if nous_logged_in else 'not logged in (run: hermes model)'}"
    )
    if nous_logged_in:
        portal_url = nous_status.get("portal_base_url") or "(unknown)"
@ -126,7 +126,7 @@ def show_status(args):
    codex_logged_in = bool(codex_status.get("logged_in"))
    print(
        f"  {'OpenAI Codex':<12}  {check_mark(codex_logged_in)} "
-        f"{'logged in' if codex_logged_in else 'not logged in (run: hermes login --provider openai-codex)'}"
+        f"{'logged in' if codex_logged_in else 'not logged in (run: hermes model)'}"
    )
    codex_auth_file = codex_status.get("auth_file")
    if codex_auth_file:
--- a/run_agent.py
+++ b/run_agent.py
@ -1432,6 +1432,14 @@ class AIAgent:
                content_text = str(content) if content is not None else ""
                if role == "assistant":
                    # Replay encrypted reasoning items from previous turns
                    # so the API can maintain coherent reasoning chains.
                    codex_reasoning = msg.get("codex_reasoning_items")
                    if isinstance(codex_reasoning, list):
                        for ri in codex_reasoning:
                            if isinstance(ri, dict) and ri.get("encrypted_content"):
                                items.append(ri)
                    if content_text.strip():
                        items.append({"role": "assistant", "content": content_text})
@ -1638,7 +1646,10 @@ class AIAgent:
        if store is not False:
            raise ValueError("Codex Responses contract requires 'store' to be false.")
-        allowed_keys = {"model", "instructions", "input", "tools", "store"}
+        allowed_keys = {
            "model", "instructions", "input", "tools", "store",
            "reasoning", "include", "max_output_tokens", "temperature",
        }
        normalized: Dict[str, Any] = {
            "model": model,
            "instructions": instructions,
@ -1647,6 +1658,22 @@ class AIAgent:
            "store": False,
        }
        # Pass through reasoning config
        reasoning = api_kwargs.get("reasoning")
        if isinstance(reasoning, dict):
            normalized["reasoning"] = reasoning
        include = api_kwargs.get("include")
        if isinstance(include, list):
            normalized["include"] = include
        # Pass through max_output_tokens and temperature
        max_output_tokens = api_kwargs.get("max_output_tokens")
        if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
            normalized["max_output_tokens"] = int(max_output_tokens)
        temperature = api_kwargs.get("temperature")
        if isinstance(temperature, (int, float)):
            normalized["temperature"] = float(temperature)
        if allow_stream:
            stream = api_kwargs.get("stream")
            if stream is not None and stream is not True:
@ -1719,6 +1746,7 @@ class AIAgent:
        content_parts: List[str] = []
        reasoning_parts: List[str] = []
        reasoning_items_raw: List[Dict[str, Any]] = []
        tool_calls: List[Any] = []
        has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
        saw_commentary_phase = False
@ -1750,6 +1778,16 @@ class AIAgent:
                reasoning_text = self._extract_responses_reasoning_text(item)
                if reasoning_text:
                    reasoning_parts.append(reasoning_text)
                # Capture the full reasoning item for multi-turn continuity.
                # encrypted_content is an opaque blob the API needs back on
                # subsequent turns to maintain coherent reasoning chains.
                encrypted = getattr(item, "encrypted_content", None)
                if isinstance(encrypted, str) and encrypted:
                    raw_item = {"type": "reasoning", "encrypted_content": encrypted}
                    item_id = getattr(item, "id", None)
                    if isinstance(item_id, str) and item_id:
                        raw_item["id"] = item_id
                    reasoning_items_raw.append(raw_item)
            elif item_type == "function_call":
                if item_status in {"queued", "in_progress", "incomplete"}:
                    continue
@ -1807,6 +1845,7 @@ class AIAgent:
            reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
            reasoning_content=None,
            reasoning_details=None,
            codex_reasoning_items=reasoning_items_raw or None,
        )
        if tool_calls:
@ -1819,7 +1858,6 @@ class AIAgent:
    def _run_codex_stream(self, api_kwargs: dict):
        """Execute one streaming Responses API request and return the final response."""
        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)
        max_stream_retries = 1
        for attempt in range(max_stream_retries + 1):
            try:
@ -1971,14 +2009,29 @@ class AIAgent:
            if not instructions:
                instructions = DEFAULT_AGENT_IDENTITY
-            return {
+            kwargs = {
                "model": self.model,
                "instructions": instructions,
                "input": self._chat_messages_to_responses_input(payload_messages),
                "tools": self._responses_tools(),
                "store": False,
                "reasoning": {"effort": "medium", "summary": "auto"},
                "include": ["reasoning.encrypted_content"],
            }
            # Apply reasoning effort from config if set
            if self.reasoning_config and isinstance(self.reasoning_config, dict):
                if self.reasoning_config.get("enabled") is False:
                    kwargs.pop("reasoning", None)
                    kwargs["include"] = []
                elif self.reasoning_config.get("effort"):
                    kwargs["reasoning"]["effort"] = self.reasoning_config["effort"]
            if self.max_tokens is not None:
                kwargs["max_output_tokens"] = self.max_tokens
            return kwargs
        provider_preferences = {}
        if self.providers_allowed:
            provider_preferences["only"] = self.providers_allowed
@ -2045,11 +2098,27 @@ class AIAgent:
        }
        if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
-            msg["reasoning_details"] = [
+            # Pass reasoning_details back unmodified so providers (OpenRouter,
-                {"type": d.get("type"), "text": d.get("text"), "signature": d.get("signature")}
+            # Anthropic, OpenAI) can maintain reasoning continuity across turns.
-                for d in assistant_message.reasoning_details
+            # Each provider may include opaque fields (signature, encrypted_content)
-                if isinstance(d, dict)
+            # that must be preserved exactly.
-            ]
+            raw_details = assistant_message.reasoning_details
            preserved = []
            for d in raw_details:
                if isinstance(d, dict):
                    preserved.append(d)
                elif hasattr(d, "__dict__"):
                    preserved.append(d.__dict__)
                elif hasattr(d, "model_dump"):
                    preserved.append(d.model_dump())
            if preserved:
                msg["reasoning_details"] = preserved
        # Codex Responses API: preserve encrypted reasoning items for
        # multi-turn continuity. These get replayed as input on the next turn.
        codex_items = getattr(assistant_message, "codex_reasoning_items", None)
        if codex_items:
            msg["codex_reasoning_items"] = codex_items
        if assistant_message.tool_calls:
            tool_calls = []
@ -2152,40 +2221,68 @@ class AIAgent:
                messages.pop()  # remove flush msg
                return
-            api_kwargs = {
+            # Use auxiliary client for the flush call when available --
-                "model": self.model,
+            # it's cheaper and avoids Codex Responses API incompatibility.
-                "messages": api_messages,
+            from agent.auxiliary_client import get_text_auxiliary_client
-                "tools": [memory_tool_def],
+            aux_client, aux_model = get_text_auxiliary_client()
                "temperature": 0.3,
                **self._max_tokens_param(1024),
            }
-            response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
+            if aux_client:
                api_kwargs = {
                    "model": aux_model,
                    "messages": api_messages,
                    "tools": [memory_tool_def],
                    "temperature": 0.3,
                    "max_tokens": 5120,
                }
                response = aux_client.chat.completions.create(**api_kwargs, timeout=30.0)
            elif self.api_mode == "codex_responses":
                # No auxiliary client -- use the Codex Responses path directly
                codex_kwargs = self._build_api_kwargs(api_messages)
                codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
                codex_kwargs["temperature"] = 0.3
                if "max_output_tokens" in codex_kwargs:
                    codex_kwargs["max_output_tokens"] = 5120
                response = self._run_codex_stream(codex_kwargs)
            else:
                api_kwargs = {
                    "model": self.model,
                    "messages": api_messages,
                    "tools": [memory_tool_def],
                    "temperature": 0.3,
                    **self._max_tokens_param(5120),
                }
                response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
-            if response.choices:
+            # Extract tool calls from the response, handling both API formats
            tool_calls = []
            if self.api_mode == "codex_responses" and not aux_client:
                assistant_msg, _ = self._normalize_codex_response(response)
                if assistant_msg and assistant_msg.tool_calls:
                    tool_calls = assistant_msg.tool_calls
            elif hasattr(response, "choices") and response.choices:
                assistant_message = response.choices[0].message
                if assistant_message.tool_calls:
-                    # Execute only memory tool calls
+                    tool_calls = assistant_message.tool_calls
-                    for tc in assistant_message.tool_calls:
+
-                        if tc.function.name == "memory":
+            for tc in tool_calls:
-                            try:
+                if tc.function.name == "memory":
-                                args = json.loads(tc.function.arguments)
+                    try:
-                                flush_target = args.get("target", "memory")
+                        args = json.loads(tc.function.arguments)
-                                from tools.memory_tool import memory_tool as _memory_tool
+                        flush_target = args.get("target", "memory")
-                                result = _memory_tool(
+                        from tools.memory_tool import memory_tool as _memory_tool
-                                    action=args.get("action"),
+                        result = _memory_tool(
-                                    target=flush_target,
+                            action=args.get("action"),
-                                    content=args.get("content"),
+                            target=flush_target,
-                                    old_text=args.get("old_text"),
+                            content=args.get("content"),
-                                    store=self._memory_store,
+                            old_text=args.get("old_text"),
-                                )
+                            store=self._memory_store,
-                                # Also send user observations to Honcho when active
+                        )
-                                if self._honcho and flush_target == "user" and args.get("action") == "add":
+                        if self._honcho and flush_target == "user" and args.get("action") == "add":
-                                    self._honcho_save_user_observation(args.get("content", ""))
+                            self._honcho_save_user_observation(args.get("content", ""))
-                                if not self.quiet_mode:
+                        if not self.quiet_mode:
-                                    print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
+                            print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
-                            except Exception as e:
+                    except Exception as e:
-                                logger.debug("Memory flush tool call failed: %s", e)
+                        logger.debug("Memory flush tool call failed: %s", e)
        except Exception as e:
            logger.debug("Memory flush API call failed: %s", e)
        finally:
@ -2493,32 +2590,19 @@ class AIAgent:
            if _is_nous:
                summary_extra_body["tags"] = ["product=hermes-agent"]
-            summary_kwargs = {
+            if self.api_mode == "codex_responses":
-                "model": self.model,
+                codex_kwargs = self._build_api_kwargs(api_messages)
-                "messages": api_messages,
+                codex_kwargs["tools"] = None
-            }
+                summary_response = self._run_codex_stream(codex_kwargs)
-            if self.max_tokens is not None:
+                assistant_message, _ = self._normalize_codex_response(summary_response)
-                summary_kwargs.update(self._max_tokens_param(self.max_tokens))
+                final_response = (assistant_message.content or "").strip() if assistant_message else ""
            if summary_extra_body:
                summary_kwargs["extra_body"] = summary_extra_body
            summary_response = self.client.chat.completions.create(**summary_kwargs)
            if summary_response.choices and summary_response.choices[0].message.content:
                final_response = summary_response.choices[0].message.content
                if "<think>" in final_response:
                    final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
                if final_response:
                    messages.append({"role": "assistant", "content": final_response})
                else:
                    final_response = "I reached the iteration limit and couldn't generate a summary."
            else:
                summary_kwargs = {
                    "model": self.model,
                    "messages": api_messages,
                }
                if self.max_tokens is not None:
-                    summary_kwargs["max_tokens"] = self.max_tokens
+                    summary_kwargs.update(self._max_tokens_param(self.max_tokens))
                if summary_extra_body:
                    summary_kwargs["extra_body"] = summary_extra_body
@ -2526,6 +2610,42 @@ class AIAgent:
                if summary_response.choices and summary_response.choices[0].message.content:
                    final_response = summary_response.choices[0].message.content
                else:
                    final_response = ""
            if final_response:
                if "<think>" in final_response:
                    final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
                if final_response:
                    messages.append({"role": "assistant", "content": final_response})
                else:
                    final_response = "I reached the iteration limit and couldn't generate a summary."
            else:
                # Retry summary generation
                if self.api_mode == "codex_responses":
                    codex_kwargs = self._build_api_kwargs(api_messages)
                    codex_kwargs["tools"] = None
                    retry_response = self._run_codex_stream(codex_kwargs)
                    retry_msg, _ = self._normalize_codex_response(retry_response)
                    final_response = (retry_msg.content or "").strip() if retry_msg else ""
                else:
                    summary_kwargs = {
                        "model": self.model,
                        "messages": api_messages,
                    }
                    if self.max_tokens is not None:
                        summary_kwargs["max_tokens"] = self.max_tokens
                    if summary_extra_body:
                        summary_kwargs["extra_body"] = summary_extra_body
                    summary_response = self.client.chat.completions.create(**summary_kwargs)
                    if summary_response.choices and summary_response.choices[0].message.content:
                        final_response = summary_response.choices[0].message.content
                    else:
                        final_response = ""
                if final_response:
                    if "<think>" in final_response:
                        final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
                    messages.append({"role": "assistant", "content": final_response})
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -0,0 +1,168 @@
 """Tests for agent.auxiliary_client resolution chain, especially the Codex fallback."""
 import json
 import os
 from pathlib import Path
 from unittest.mock import patch, MagicMock
 import pytest
 from agent.auxiliary_client import (
    get_text_auxiliary_client,
    get_vision_auxiliary_client,
    auxiliary_max_tokens_param,
    _read_codex_access_token,
 )
@pytest.fixture(autouse=True)
 def _clean_env(monkeypatch):
    """Strip provider env vars so each test starts clean."""
    for key in (
        "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
        "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
    ):
        monkeypatch.delenv(key, raising=False)
@pytest.fixture
 def codex_auth_dir(tmp_path, monkeypatch):
    """Provide a writable ~/.codex/ directory with a valid auth.json."""
    codex_dir = tmp_path / ".codex"
    codex_dir.mkdir()
    auth_file = codex_dir / "auth.json"
    auth_file.write_text(json.dumps({
        "tokens": {
            "access_token": "codex-test-token-abc123",
            "refresh_token": "codex-refresh-xyz",
        }
    }))
    monkeypatch.setattr(
        "agent.auxiliary_client._read_codex_access_token",
        lambda: "codex-test-token-abc123",
    )
    return codex_dir
 class TestReadCodexAccessToken:
    def test_valid_auth_file(self, tmp_path):
        codex_dir = tmp_path / ".codex"
        codex_dir.mkdir()
        auth = codex_dir / "auth.json"
        auth.write_text(json.dumps({
            "tokens": {"access_token": "tok-123", "refresh_token": "r-456"}
        }))
        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
            result = _read_codex_access_token()
        assert result == "tok-123"
    def test_missing_file_returns_none(self, tmp_path):
        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
            result = _read_codex_access_token()
        assert result is None
    def test_empty_token_returns_none(self, tmp_path):
        codex_dir = tmp_path / ".codex"
        codex_dir.mkdir()
        auth = codex_dir / "auth.json"
        auth.write_text(json.dumps({"tokens": {"access_token": "  "}}))
        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
            result = _read_codex_access_token()
        assert result is None
    def test_malformed_json_returns_none(self, tmp_path):
        codex_dir = tmp_path / ".codex"
        codex_dir.mkdir()
        (codex_dir / "auth.json").write_text("{bad json")
        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
            result = _read_codex_access_token()
        assert result is None
    def test_missing_tokens_key_returns_none(self, tmp_path):
        codex_dir = tmp_path / ".codex"
        codex_dir.mkdir()
        (codex_dir / "auth.json").write_text(json.dumps({"other": "data"}))
        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
            result = _read_codex_access_token()
        assert result is None
 class TestGetTextAuxiliaryClient:
    """Test the full resolution chain for get_text_auxiliary_client."""
    def test_openrouter_takes_priority(self, monkeypatch, codex_auth_dir):
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_text_auxiliary_client()
        assert model == "google/gemini-3-flash-preview"
        mock_openai.assert_called_once()
        call_kwargs = mock_openai.call_args
        assert call_kwargs.kwargs["api_key"] == "or-key"
    def test_nous_takes_priority_over_codex(self, monkeypatch, codex_auth_dir):
        with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            mock_nous.return_value = {"access_token": "nous-tok"}
            client, model = get_text_auxiliary_client()
        assert model == "gemini-3-flash"
    def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir):
        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
        monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
        # Override the autouse monkeypatch for codex
        monkeypatch.setattr(
            "agent.auxiliary_client._read_codex_access_token",
            lambda: "codex-test-token-abc123",
        )
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_text_auxiliary_client()
        assert model == "gpt-4o-mini"
        call_kwargs = mock_openai.call_args
        assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
    def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client.OpenAI") as mock_openai:
            client, model = get_text_auxiliary_client()
        assert model == "gpt-5.3-codex"
        # Returns a CodexAuxiliaryClient wrapper, not a raw OpenAI client
        from agent.auxiliary_client import CodexAuxiliaryClient
        assert isinstance(client, CodexAuxiliaryClient)
    def test_returns_none_when_nothing_available(self):
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
            client, model = get_text_auxiliary_client()
        assert client is None
        assert model is None
 class TestCodexNotInVisionClient:
    """Codex fallback should NOT apply to vision tasks."""
    def test_vision_returns_none_without_openrouter_nous(self):
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
            client, model = get_vision_auxiliary_client()
        assert client is None
        assert model is None
 class TestAuxiliaryMaxTokensParam:
    def test_codex_fallback_uses_max_tokens(self, monkeypatch):
        """Codex adapter translates max_tokens internally, so we return max_tokens."""
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client._read_codex_access_token", return_value="tok"):
            result = auxiliary_max_tokens_param(1024)
        assert result == {"max_tokens": 1024}
    def test_openrouter_uses_max_tokens(self, monkeypatch):
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
        result = auxiliary_max_tokens_param(1024)
        assert result == {"max_tokens": 1024}
    def test_no_provider_uses_max_tokens(self):
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
            result = auxiliary_max_tokens_param(1024)
        assert result == {"max_tokens": 1024}
--- a/tests/test_auth_codex_provider.py
+++ b/tests/test_auth_codex_provider.py
@ -185,8 +185,8 @@ def test_login_openai_codex_persists_provider_state(tmp_path, monkeypatch):
    _write_codex_auth(codex_home)
    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
    monkeypatch.setenv("CODEX_HOME", str(codex_home))
-    monkeypatch.setattr("hermes_cli.auth.shutil.which", lambda _: "/usr/local/bin/codex")
+    # Mock input() to accept existing credentials
-    monkeypatch.setattr("hermes_cli.auth.subprocess.run", lambda *a, **k: None)
+    monkeypatch.setattr("builtins.input", lambda _: "y")
    _login_openai_codex(SimpleNamespace(), PROVIDER_REGISTRY["openai-codex"])
@ -201,19 +201,10 @@ def test_login_openai_codex_persists_provider_state(tmp_path, monkeypatch):
    assert config["model"]["base_url"] == DEFAULT_CODEX_BASE_URL
-def test_login_command_defaults_to_nous(monkeypatch):
+def test_login_command_shows_deprecation(monkeypatch, capsys):
-    calls = {"nous": 0, "codex": 0}
+    """login_command is deprecated and directs users to hermes model."""
-
+    with pytest.raises(SystemExit) as exc_info:
-    def _fake_nous(args, pconfig):
+        login_command(SimpleNamespace())
-        calls["nous"] += 1
+    assert exc_info.value.code == 0
-
+    captured = capsys.readouterr()
-    def _fake_codex(args, pconfig):
+    assert "hermes model" in captured.out
        calls["codex"] += 1
    monkeypatch.setattr("hermes_cli.auth._login_nous", _fake_nous)
    monkeypatch.setattr("hermes_cli.auth._login_openai_codex", _fake_codex)
    login_command(SimpleNamespace())
    assert calls["nous"] == 1
    assert calls["codex"] == 0
--- a/tests/test_cli_init.py
+++ b/tests/test_cli_init.py
@ -0,0 +1,80 @@
 """Tests for HermesCLI initialization -- catches configuration bugs
 that only manifest at runtime (not in mocked unit tests)."""
 import os
 import sys
 from unittest.mock import patch, MagicMock
 import pytest
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
 def _make_cli(**kwargs):
    """Create a HermesCLI instance with minimal mocking."""
    from cli import HermesCLI
    with patch("cli.get_tool_definitions", return_value=[]):
        return HermesCLI(**kwargs)
 class TestMaxTurnsResolution:
    """max_turns must always resolve to a positive integer, never None."""
    def test_default_max_turns_is_integer(self):
        cli = _make_cli()
        assert isinstance(cli.max_turns, int)
        assert cli.max_turns > 0
    def test_explicit_max_turns_honored(self):
        cli = _make_cli(max_turns=25)
        assert cli.max_turns == 25
    def test_none_max_turns_gets_default(self):
        cli = _make_cli(max_turns=None)
        assert isinstance(cli.max_turns, int)
        assert cli.max_turns > 0
    def test_env_var_max_turns(self, monkeypatch):
        """Env var is used when config file doesn't set max_turns."""
        monkeypatch.setenv("HERMES_MAX_ITERATIONS", "42")
        import cli as cli_module
        original = cli_module.CLI_CONFIG["agent"].get("max_turns")
        cli_module.CLI_CONFIG["agent"]["max_turns"] = None
        try:
            cli_obj = _make_cli()
            assert cli_obj.max_turns == 42
        finally:
            if original is not None:
                cli_module.CLI_CONFIG["agent"]["max_turns"] = original
    def test_max_turns_never_none_for_agent(self):
        """The value passed to AIAgent must never be None (causes TypeError in run_conversation)."""
        cli = _make_cli()
        assert cli.max_turns is not None
 class TestVerboseAndToolProgress:
    def test_default_verbose_is_bool(self):
        cli = _make_cli()
        assert isinstance(cli.verbose, bool)
    def test_tool_progress_mode_is_string(self):
        cli = _make_cli()
        assert isinstance(cli.tool_progress_mode, str)
        assert cli.tool_progress_mode in ("off", "new", "all", "verbose")
 class TestProviderResolution:
    def test_api_key_is_string_or_none(self):
        cli = _make_cli()
        assert cli.api_key is None or isinstance(cli.api_key, str)
    def test_base_url_is_string(self):
        cli = _make_cli()
        assert isinstance(cli.base_url, str)
        assert cli.base_url.startswith("http")
    def test_model_is_string(self):
        cli = _make_cli()
        assert isinstance(cli.model, str)
        assert len(cli.model) > 0
--- a/tests/test_codex_execution_paths.py
+++ b/tests/test_codex_execution_paths.py
@ -149,6 +149,11 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
    runner._prefill_messages = []
    runner._reasoning_config = None
    runner._running_agents = {}
    from unittest.mock import MagicMock, AsyncMock
    runner.hooks = MagicMock()
    runner.hooks.emit = AsyncMock()
    runner.hooks.loaded_hooks = []
    runner._session_db = None
    source = SessionSource(
        platform=Platform.LOCAL,
--- a/tests/test_external_credential_detection.py
+++ b/tests/test_external_credential_detection.py
@ -0,0 +1,51 @@
 """Tests for detect_external_credentials() -- Phase 2 credential sync."""
 import json
 from pathlib import Path
 from unittest.mock import patch
 import pytest
 from hermes_cli.auth import detect_external_credentials
 class TestDetectCodexCLI:
    def test_detects_valid_codex_auth(self, tmp_path):
        codex_dir = tmp_path / ".codex"
        codex_dir.mkdir()
        auth = codex_dir / "auth.json"
        auth.write_text(json.dumps({
            "tokens": {"access_token": "tok-123", "refresh_token": "ref-456"}
        }))
        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
            result = detect_external_credentials()
        codex_hits = [c for c in result if c["provider"] == "openai-codex"]
        assert len(codex_hits) == 1
        assert "Codex CLI" in codex_hits[0]["label"]
        assert str(auth) == codex_hits[0]["path"]
    def test_skips_codex_without_access_token(self, tmp_path):
        codex_dir = tmp_path / ".codex"
        codex_dir.mkdir()
        (codex_dir / "auth.json").write_text(json.dumps({"tokens": {}}))
        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
            result = detect_external_credentials()
        assert not any(c["provider"] == "openai-codex" for c in result)
    def test_skips_missing_codex_dir(self, tmp_path):
        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / "nonexistent"):
            result = detect_external_credentials()
        assert not any(c["provider"] == "openai-codex" for c in result)
    def test_skips_malformed_codex_auth(self, tmp_path):
        codex_dir = tmp_path / ".codex"
        codex_dir.mkdir()
        (codex_dir / "auth.json").write_text("{bad json")
        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
            result = detect_external_credentials()
        assert not any(c["provider"] == "openai-codex" for c in result)
    def test_returns_empty_when_nothing_found(self, tmp_path):
        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / ".codex"):
            result = detect_external_credentials()
        assert result == []
--- a/tests/test_flush_memories_codex.py
+++ b/tests/test_flush_memories_codex.py
@ -0,0 +1,225 @@
 """Tests for flush_memories() working correctly across all provider modes.
 Catches the bug where Codex mode called chat.completions.create on a
 Responses-only client, which would fail silently or with a 404.
 """
 import json
 import os
 import sys
 import types
 from types import SimpleNamespace
 from unittest.mock import patch, MagicMock, call
 import pytest
 sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
 sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
 sys.modules.setdefault("fal_client", types.SimpleNamespace())
 import run_agent
 class _FakeOpenAI:
    def __init__(self, **kwargs):
        self.kwargs = kwargs
        self.api_key = kwargs.get("api_key", "test")
        self.base_url = kwargs.get("base_url", "http://test")
    def close(self):
        pass
 def _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter"):
    """Build an AIAgent with mocked internals, ready for flush_memories testing."""
    monkeypatch.setattr(run_agent, "get_tool_definitions", lambda **kw: [
        {
            "type": "function",
            "function": {
                "name": "memory",
                "description": "Manage memories.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "action": {"type": "string"},
                        "target": {"type": "string"},
                        "content": {"type": "string"},
                    },
                },
            },
        },
    ])
    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
    agent = run_agent.AIAgent(
        api_key="test-key",
        base_url="https://test.example.com/v1",
        provider=provider,
        api_mode=api_mode,
        max_iterations=4,
        quiet_mode=True,
        skip_context_files=True,
        skip_memory=True,
    )
    # Give it a valid memory store
    agent._memory_store = MagicMock()
    agent._memory_flush_min_turns = 1
    agent._user_turn_count = 5
    return agent
 def _chat_response_with_memory_call():
    """Simulated chat completions response with a memory tool call."""
    return SimpleNamespace(
        choices=[SimpleNamespace(
            message=SimpleNamespace(
                content=None,
                tool_calls=[SimpleNamespace(
                    function=SimpleNamespace(
                        name="memory",
                        arguments=json.dumps({
                            "action": "add",
                            "target": "notes",
                            "content": "User prefers dark mode.",
                        }),
                    ),
                )],
            ),
        )],
        usage=SimpleNamespace(prompt_tokens=100, completion_tokens=20, total_tokens=120),
    )
 class TestFlushMemoriesUsesAuxiliaryClient:
    """When an auxiliary client is available, flush_memories should use it
    instead of self.client -- especially critical in Codex mode."""
    def test_flush_uses_auxiliary_when_available(self, monkeypatch):
        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
        mock_aux_client = MagicMock()
        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
            messages = [
                {"role": "user", "content": "Hello"},
                {"role": "assistant", "content": "Hi there"},
                {"role": "user", "content": "Remember this"},
            ]
            with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
                agent.flush_memories(messages)
        mock_aux_client.chat.completions.create.assert_called_once()
        call_kwargs = mock_aux_client.chat.completions.create.call_args
        assert call_kwargs.kwargs.get("model") == "gpt-4o-mini" or call_kwargs[1].get("model") == "gpt-4o-mini"
    def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch):
        """Non-Codex mode with no auxiliary falls back to self.client."""
        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
        agent.client = MagicMock()
        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)):
            messages = [
                {"role": "user", "content": "Hello"},
                {"role": "assistant", "content": "Hi there"},
                {"role": "user", "content": "Save this"},
            ]
            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
                agent.flush_memories(messages)
        agent.client.chat.completions.create.assert_called_once()
    def test_flush_executes_memory_tool_calls(self, monkeypatch):
        """Verify that memory tool calls from the flush response actually get executed."""
        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
        mock_aux_client = MagicMock()
        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
            messages = [
                {"role": "user", "content": "Hello"},
                {"role": "assistant", "content": "Hi"},
                {"role": "user", "content": "Note this"},
            ]
            with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
                agent.flush_memories(messages)
        mock_memory.assert_called_once()
        call_kwargs = mock_memory.call_args
        assert call_kwargs.kwargs["action"] == "add"
        assert call_kwargs.kwargs["target"] == "notes"
        assert "dark mode" in call_kwargs.kwargs["content"]
    def test_flush_strips_artifacts_from_messages(self, monkeypatch):
        """After flush, the flush prompt and any response should be removed from messages."""
        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
        mock_aux_client = MagicMock()
        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
            messages = [
                {"role": "user", "content": "Hello"},
                {"role": "assistant", "content": "Hi"},
                {"role": "user", "content": "Remember X"},
            ]
            original_len = len(messages)
            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
                agent.flush_memories(messages)
        # Messages should not grow from the flush
        assert len(messages) <= original_len
        # No flush sentinel should remain
        for msg in messages:
            assert "_flush_sentinel" not in msg
 class TestFlushMemoriesCodexFallback:
    """When no auxiliary client exists and we're in Codex mode, flush should
    use the Codex Responses API path instead of chat.completions."""
    def test_codex_mode_no_aux_uses_responses_api(self, monkeypatch):
        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
        codex_response = SimpleNamespace(
            output=[
                SimpleNamespace(
                    type="function_call",
                    call_id="call_1",
                    name="memory",
                    arguments=json.dumps({
                        "action": "add",
                        "target": "notes",
                        "content": "Codex flush test",
                    }),
                ),
            ],
            usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60),
            status="completed",
            model="gpt-5-codex",
        )
        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)), \
             patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
             patch.object(agent, "_build_api_kwargs") as mock_build, \
             patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
            mock_build.return_value = {
                "model": "gpt-5-codex",
                "instructions": "test",
                "input": [],
                "tools": [],
                "max_output_tokens": 4096,
            }
            messages = [
                {"role": "user", "content": "Hello"},
                {"role": "assistant", "content": "Hi"},
                {"role": "user", "content": "Save this"},
            ]
            agent.flush_memories(messages)
        mock_stream.assert_called_once()
        mock_memory.assert_called_once()
        assert mock_memory.call_args.kwargs["content"] == "Codex flush test"
--- a/tests/test_provider_parity.py
+++ b/tests/test_provider_parity.py
@ -0,0 +1,460 @@
 """Provider parity tests: verify that AIAgent builds correct API kwargs
 and handles responses properly for all supported providers.
 Ensures changes to one provider path don't silently break another.
 """
 import json
 import os
 import sys
 import types
 from types import SimpleNamespace
 from unittest.mock import patch, MagicMock
 import pytest
 sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
 sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
 sys.modules.setdefault("fal_client", types.SimpleNamespace())
 from run_agent import AIAgent
 # ── Helpers ──────────────────────────────────────────────────────────────────
 def _tool_defs(*names):
    return [
        {
            "type": "function",
            "function": {
                "name": n,
                "description": f"{n} tool",
                "parameters": {"type": "object", "properties": {}},
            },
        }
        for n in names
    ]
 class _FakeOpenAI:
    def __init__(self, **kw):
        self.api_key = kw.get("api_key", "test")
        self.base_url = kw.get("base_url", "http://test")
    def close(self):
        pass
 def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1"):
    monkeypatch.setattr("run_agent.get_tool_definitions", lambda **kw: _tool_defs("web_search", "terminal"))
    monkeypatch.setattr("run_agent.check_toolset_requirements", lambda: {})
    monkeypatch.setattr("run_agent.OpenAI", _FakeOpenAI)
    return AIAgent(
        api_key="test-key",
        base_url=base_url,
        provider=provider,
        api_mode=api_mode,
        max_iterations=4,
        quiet_mode=True,
        skip_context_files=True,
        skip_memory=True,
    )
 # ── _build_api_kwargs tests ─────────────────────────────────────────────────
 class TestBuildApiKwargsOpenRouter:
    def test_uses_chat_completions_format(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openrouter")
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        assert "messages" in kwargs
        assert "model" in kwargs
        assert kwargs["messages"][-1]["content"] == "hi"
    def test_includes_reasoning_in_extra_body(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openrouter")
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        extra = kwargs.get("extra_body", {})
        assert "reasoning" in extra
        assert extra["reasoning"]["enabled"] is True
    def test_includes_tools(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openrouter")
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        assert "tools" in kwargs
        tool_names = [t["function"]["name"] for t in kwargs["tools"]]
        assert "web_search" in tool_names
    def test_no_responses_api_fields(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openrouter")
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        assert "input" not in kwargs
        assert "instructions" not in kwargs
        assert "store" not in kwargs
 class TestBuildApiKwargsNousPortal:
    def test_includes_nous_product_tags(self, monkeypatch):
        agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        extra = kwargs.get("extra_body", {})
        assert extra.get("tags") == ["product=hermes-agent"]
    def test_uses_chat_completions_format(self, monkeypatch):
        agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        assert "messages" in kwargs
        assert "input" not in kwargs
 class TestBuildApiKwargsCustomEndpoint:
    def test_uses_chat_completions_format(self, monkeypatch):
        agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1")
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        assert "messages" in kwargs
        assert "input" not in kwargs
    def test_no_openrouter_extra_body(self, monkeypatch):
        agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1")
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        extra = kwargs.get("extra_body", {})
        assert "reasoning" not in extra
 class TestBuildApiKwargsCodex:
    def test_uses_responses_api_format(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                            base_url="https://chatgpt.com/backend-api/codex")
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        assert "input" in kwargs
        assert "instructions" in kwargs
        assert "messages" not in kwargs
        assert kwargs["store"] is False
    def test_includes_reasoning_config(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                            base_url="https://chatgpt.com/backend-api/codex")
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        assert "reasoning" in kwargs
        assert kwargs["reasoning"]["effort"] == "medium"
    def test_includes_encrypted_content_in_include(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                            base_url="https://chatgpt.com/backend-api/codex")
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        assert "reasoning.encrypted_content" in kwargs.get("include", [])
    def test_tools_converted_to_responses_format(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                            base_url="https://chatgpt.com/backend-api/codex")
        messages = [{"role": "user", "content": "hi"}]
        kwargs = agent._build_api_kwargs(messages)
        tools = kwargs.get("tools", [])
        assert len(tools) > 0
        # Responses format has "name" at top level, not nested under "function"
        assert "name" in tools[0]
        assert "function" not in tools[0]
 # ── Message conversion tests ────────────────────────────────────────────────
 class TestChatMessagesToResponsesInput:
    """Verify _chat_messages_to_responses_input for Codex mode."""
    def test_user_message_passes_through(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                            base_url="https://chatgpt.com/backend-api/codex")
        messages = [{"role": "user", "content": "hello"}]
        items = agent._chat_messages_to_responses_input(messages)
        assert items == [{"role": "user", "content": "hello"}]
    def test_system_messages_filtered(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                            base_url="https://chatgpt.com/backend-api/codex")
        messages = [
            {"role": "system", "content": "be helpful"},
            {"role": "user", "content": "hello"},
        ]
        items = agent._chat_messages_to_responses_input(messages)
        assert len(items) == 1
        assert items[0]["role"] == "user"
    def test_assistant_tool_calls_become_function_call_items(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                            base_url="https://chatgpt.com/backend-api/codex")
        messages = [{
            "role": "assistant",
            "content": "",
            "tool_calls": [{
                "id": "call_abc",
                "call_id": "call_abc",
                "function": {"name": "web_search", "arguments": '{"query": "test"}'},
            }],
        }]
        items = agent._chat_messages_to_responses_input(messages)
        fc_items = [i for i in items if i.get("type") == "function_call"]
        assert len(fc_items) == 1
        assert fc_items[0]["name"] == "web_search"
        assert fc_items[0]["call_id"] == "call_abc"
    def test_tool_results_become_function_call_output(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                            base_url="https://chatgpt.com/backend-api/codex")
        messages = [{"role": "tool", "tool_call_id": "call_abc", "content": "result here"}]
        items = agent._chat_messages_to_responses_input(messages)
        assert items[0]["type"] == "function_call_output"
        assert items[0]["call_id"] == "call_abc"
        assert items[0]["output"] == "result here"
    def test_encrypted_reasoning_replayed(self, monkeypatch):
        """Encrypted reasoning items from previous turns must be included in input."""
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                            base_url="https://chatgpt.com/backend-api/codex")
        messages = [
            {"role": "user", "content": "think about this"},
            {
                "role": "assistant",
                "content": "I thought about it.",
                "codex_reasoning_items": [
                    {"type": "reasoning", "id": "rs_abc", "encrypted_content": "gAAAA_test_blob"},
                ],
            },
            {"role": "user", "content": "continue"},
        ]
        items = agent._chat_messages_to_responses_input(messages)
        reasoning_items = [i for i in items if i.get("type") == "reasoning"]
        assert len(reasoning_items) == 1
        assert reasoning_items[0]["encrypted_content"] == "gAAAA_test_blob"
    def test_no_reasoning_items_for_non_codex_messages(self, monkeypatch):
        """Messages without codex_reasoning_items should not inject anything."""
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                            base_url="https://chatgpt.com/backend-api/codex")
        messages = [
            {"role": "assistant", "content": "hi"},
            {"role": "user", "content": "hello"},
        ]
        items = agent._chat_messages_to_responses_input(messages)
        reasoning_items = [i for i in items if i.get("type") == "reasoning"]
        assert len(reasoning_items) == 0
 # ── Response normalization tests ─────────────────────────────────────────────
 class TestNormalizeCodexResponse:
    """Verify _normalize_codex_response extracts all fields correctly."""
    def _make_codex_agent(self, monkeypatch):
        return _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                           base_url="https://chatgpt.com/backend-api/codex")
    def test_text_response(self, monkeypatch):
        agent = self._make_codex_agent(monkeypatch)
        response = SimpleNamespace(
            output=[
                SimpleNamespace(type="message", status="completed",
                    content=[SimpleNamespace(type="output_text", text="Hello!")],
                    phase="final_answer"),
            ],
            status="completed",
        )
        msg, reason = agent._normalize_codex_response(response)
        assert msg.content == "Hello!"
        assert reason == "stop"
    def test_reasoning_summary_extracted(self, monkeypatch):
        agent = self._make_codex_agent(monkeypatch)
        response = SimpleNamespace(
            output=[
                SimpleNamespace(type="reasoning",
                    encrypted_content="gAAAA_blob",
                    summary=[SimpleNamespace(type="summary_text", text="Thinking about math")],
                    id="rs_123", status=None),
                SimpleNamespace(type="message", status="completed",
                    content=[SimpleNamespace(type="output_text", text="42")],
                    phase="final_answer"),
            ],
            status="completed",
        )
        msg, reason = agent._normalize_codex_response(response)
        assert msg.content == "42"
        assert "math" in msg.reasoning
        assert reason == "stop"
    def test_encrypted_content_captured(self, monkeypatch):
        agent = self._make_codex_agent(monkeypatch)
        response = SimpleNamespace(
            output=[
                SimpleNamespace(type="reasoning",
                    encrypted_content="gAAAA_secret_blob_123",
                    summary=[SimpleNamespace(type="summary_text", text="Thinking")],
                    id="rs_456", status=None),
                SimpleNamespace(type="message", status="completed",
                    content=[SimpleNamespace(type="output_text", text="done")],
                    phase="final_answer"),
            ],
            status="completed",
        )
        msg, reason = agent._normalize_codex_response(response)
        assert msg.codex_reasoning_items is not None
        assert len(msg.codex_reasoning_items) == 1
        assert msg.codex_reasoning_items[0]["encrypted_content"] == "gAAAA_secret_blob_123"
        assert msg.codex_reasoning_items[0]["id"] == "rs_456"
    def test_no_encrypted_content_when_missing(self, monkeypatch):
        agent = self._make_codex_agent(monkeypatch)
        response = SimpleNamespace(
            output=[
                SimpleNamespace(type="message", status="completed",
                    content=[SimpleNamespace(type="output_text", text="no reasoning")],
                    phase="final_answer"),
            ],
            status="completed",
        )
        msg, reason = agent._normalize_codex_response(response)
        assert msg.codex_reasoning_items is None
    def test_tool_calls_extracted(self, monkeypatch):
        agent = self._make_codex_agent(monkeypatch)
        response = SimpleNamespace(
            output=[
                SimpleNamespace(type="function_call", status="completed",
                    call_id="call_xyz", name="web_search",
                    arguments='{"query":"test"}', id="fc_xyz"),
            ],
            status="completed",
        )
        msg, reason = agent._normalize_codex_response(response)
        assert reason == "tool_calls"
        assert len(msg.tool_calls) == 1
        assert msg.tool_calls[0].function.name == "web_search"
 # ── Chat completions response handling (OpenRouter/Nous) ─────────────────────
 class TestBuildAssistantMessage:
    """Verify _build_assistant_message works for all provider response formats."""
    def test_openrouter_reasoning_fields(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openrouter")
        msg = SimpleNamespace(
            content="answer",
            tool_calls=None,
            reasoning="I thought about it",
            reasoning_content=None,
            reasoning_details=None,
        )
        result = agent._build_assistant_message(msg, "stop")
        assert result["content"] == "answer"
        assert result["reasoning"] == "I thought about it"
        assert "codex_reasoning_items" not in result
    def test_openrouter_reasoning_details_preserved_unmodified(self, monkeypatch):
        """reasoning_details must be passed back exactly as received for
        multi-turn continuity (OpenRouter, Anthropic, OpenAI all need this)."""
        agent = _make_agent(monkeypatch, "openrouter")
        original_detail = {
            "type": "thinking",
            "thinking": "deep thoughts here",
            "signature": "sig123_opaque_blob",
            "encrypted_content": "some_provider_blob",
            "extra_field": "should_not_be_dropped",
        }
        msg = SimpleNamespace(
            content="answer",
            tool_calls=None,
            reasoning=None,
            reasoning_content=None,
            reasoning_details=[original_detail],
        )
        result = agent._build_assistant_message(msg, "stop")
        stored = result["reasoning_details"][0]
        # ALL fields must survive, not just type/text/signature
        assert stored["signature"] == "sig123_opaque_blob"
        assert stored["encrypted_content"] == "some_provider_blob"
        assert stored["extra_field"] == "should_not_be_dropped"
        assert stored["thinking"] == "deep thoughts here"
    def test_codex_preserves_encrypted_reasoning(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
                            base_url="https://chatgpt.com/backend-api/codex")
        msg = SimpleNamespace(
            content="result",
            tool_calls=None,
            reasoning="summary text",
            reasoning_content=None,
            reasoning_details=None,
            codex_reasoning_items=[
                {"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"},
            ],
        )
        result = agent._build_assistant_message(msg, "stop")
        assert result["codex_reasoning_items"] == [
            {"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"},
        ]
    def test_plain_message_no_codex_items(self, monkeypatch):
        agent = _make_agent(monkeypatch, "openrouter")
        msg = SimpleNamespace(
            content="simple",
            tool_calls=None,
            reasoning=None,
            reasoning_content=None,
            reasoning_details=None,
        )
        result = agent._build_assistant_message(msg, "stop")
        assert "codex_reasoning_items" not in result
 # ── Auxiliary client provider resolution ─────────────────────────────────────
 class TestAuxiliaryClientProviderPriority:
    """Verify auxiliary client resolution doesn't break for any provider."""
    def test_openrouter_always_wins(self, monkeypatch):
        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
        from agent.auxiliary_client import get_text_auxiliary_client
        with patch("agent.auxiliary_client.OpenAI") as mock:
            client, model = get_text_auxiliary_client()
        assert model == "google/gemini-3-flash-preview"
        assert "openrouter" in str(mock.call_args.kwargs["base_url"]).lower()
    def test_nous_when_no_openrouter(self, monkeypatch):
        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
        from agent.auxiliary_client import get_text_auxiliary_client
        with patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "nous-tok"}), \
             patch("agent.auxiliary_client.OpenAI") as mock:
            client, model = get_text_auxiliary_client()
        assert model == "gemini-3-flash"
    def test_custom_endpoint_when_no_nous(self, monkeypatch):
        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
        from agent.auxiliary_client import get_text_auxiliary_client
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client.OpenAI") as mock:
            client, model = get_text_auxiliary_client()
        assert mock.call_args.kwargs["base_url"] == "http://localhost:1234/v1"
    def test_codex_fallback_last_resort(self, monkeypatch):
        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
        from agent.auxiliary_client import get_text_auxiliary_client, CodexAuxiliaryClient
        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
             patch("agent.auxiliary_client._read_codex_access_token", return_value="codex-tok"), \
             patch("agent.auxiliary_client.OpenAI"):
            client, model = get_text_auxiliary_client()
        assert model == "gpt-5.3-codex"
        assert isinstance(client, CodexAuxiliaryClient)
--- a/tests/test_run_agent_codex_responses.py
+++ b/tests/test_run_agent_codex_responses.py
@ -530,12 +530,27 @@ def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id
 def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch):
    agent = _build_agent(monkeypatch)
    kwargs = _codex_request_kwargs()
-    kwargs["temperature"] = 0
+    kwargs["some_unknown_field"] = "value"
    with pytest.raises(ValueError, match="unsupported field"):
        agent._preflight_codex_api_kwargs(kwargs)
 def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch):
    agent = _build_agent(monkeypatch)
    kwargs = _codex_request_kwargs()
    kwargs["reasoning"] = {"effort": "high", "summary": "auto"}
    kwargs["include"] = ["reasoning.encrypted_content"]
    kwargs["temperature"] = 0.7
    kwargs["max_output_tokens"] = 4096
    result = agent._preflight_codex_api_kwargs(kwargs)
    assert result["reasoning"] == {"effort": "high", "summary": "auto"}
    assert result["include"] == ["reasoning.encrypted_content"]
    assert result["temperature"] == 0.7
    assert result["max_output_tokens"] == 4096
 def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
    agent = _build_agent(monkeypatch)
    responses = [_codex_tool_call_response(), _codex_message_response("done")]
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@ -24,26 +24,13 @@ from typing import Dict, Any, List, Optional
 from openai import AsyncOpenAI, OpenAI
-from agent.auxiliary_client import get_text_auxiliary_client
+from agent.auxiliary_client import get_async_text_auxiliary_client
-# Resolve the auxiliary client at import time so we have the model slug.
+# Resolve the async auxiliary client at import time so we have the model slug.
-# We build an AsyncOpenAI from the same credentials for async summarization.
+# Handles Codex Responses API adapter transparently.
-_aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client()
+_async_aux_client, _SUMMARIZER_MODEL = get_async_text_auxiliary_client()
 _async_aux_client: AsyncOpenAI | None = None
 if _aux_client is not None:
    _async_kwargs = {
        "api_key": _aux_client.api_key,
        "base_url": str(_aux_client.base_url),
    }
    if "openrouter" in str(_aux_client.base_url).lower():
        _async_kwargs["default_headers"] = {
            "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
            "X-OpenRouter-Title": "Hermes Agent",
                "X-OpenRouter-Categories": "productivity,cli-agent",
        }
    _async_aux_client = AsyncOpenAI(**_async_kwargs)
 MAX_SESSION_CHARS = 100_000
-MAX_SUMMARY_TOKENS = 2000
+MAX_SUMMARY_TOKENS = 10000
 def _format_timestamp(ts) -> str:
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@ -48,7 +48,7 @@ import asyncio
 from typing import List, Dict, Any, Optional
 from firecrawl import Firecrawl
 from openai import AsyncOpenAI
-from agent.auxiliary_client import get_text_auxiliary_client
+from agent.auxiliary_client import get_async_text_auxiliary_client
 from tools.debug_helpers import DebugSession
 logger = logging.getLogger(__name__)
@ -67,21 +67,9 @@ def _get_firecrawl_client():
 DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
-# Resolve auxiliary text client at module level; build an async wrapper.
+# Resolve async auxiliary client at module level.
-_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client()
+# Handles Codex Responses API adapter transparently.
-_aux_async_client: AsyncOpenAI | None = None
+_aux_async_client, DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client()
 if _aux_sync_client is not None:
    _async_kwargs = {
        "api_key": _aux_sync_client.api_key,
        "base_url": str(_aux_sync_client.base_url),
    }
    if "openrouter" in str(_aux_sync_client.base_url).lower():
        _async_kwargs["default_headers"] = {
            "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
            "X-OpenRouter-Title": "Hermes Agent",
                "X-OpenRouter-Categories": "productivity,cli-agent",
        }
    _aux_async_client = AsyncOpenAI(**_async_kwargs)
 _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
@ -174,7 +162,7 @@ async def _call_summarizer_llm(
    content: str, 
    context_str: str, 
    model: str, 
-    max_tokens: int = 4000,
+    max_tokens: int = 20000,
    is_chunk: bool = False,
    chunk_info: str = ""
 ) -> Optional[str]:
@ -306,7 +294,7 @@ async def _process_large_content_chunked(
                chunk_content, 
                context_str, 
                model, 
-                max_tokens=2000,
+                max_tokens=10000,
                is_chunk=True,
                chunk_info=chunk_info
            )
@ -374,7 +362,7 @@ Create a single, unified markdown summary."""
                {"role": "user", "content": synthesis_prompt}
            ],
            temperature=0.1,
-            **auxiliary_max_tokens_param(4000),
+            **auxiliary_max_tokens_param(20000),
            **({} if not _extra else {"extra_body": _extra}),
        )
        final_summary = response.choices[0].message.content.strip()