refactor(cli): Finalize OpenAI Codex Integration with OAuth

- Enhanced Codex model discovery by fetching available models from the API, with fallback to local cache and defaults. - Updated the context compressor's summary target tokens to 2500 for improved performance. - Added external credential detection for Codex CLI to streamline authentication. - Refactored various components to ensure consistent handling of authentication and model selection across the application.
2026-02-28 21:47:51 -08:00 · 2026-02-28 21:47:51 -08:00 · 500f0eab4a
commit 500f0eab4a
parent 86b1db0598
22 changed files with 1784 additions and 207 deletions
--- a/README.md
+++ b/README.md
@ -121,11 +121,14 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro

 | Provider | Setup |
 |----------|-------|
-| **Nous Portal** | `hermes login` (OAuth, subscription-based) |
+| **Nous Portal** | `hermes model` (OAuth, subscription-based) |
+| **OpenAI Codex** | `hermes model` (ChatGPT OAuth, uses Codex models) |
 | **OpenRouter** | `OPENROUTER_API_KEY` in `~/.hermes/.env` |
 | **Custom Endpoint** | `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` |

-**Note:** Even when using Nous Portal or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools.
+**Codex note:** The OpenAI Codex provider authenticates via device code (open a URL, enter a code). Credentials are stored at `~/.codex/auth.json` and auto-refresh. No Codex CLI installation required.
+
+**Note:** Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools.

 ---

@ -368,7 +371,7 @@ hermes --resume <id>      # Resume a specific session (-r)

 # Provider & model management
 hermes model              # Switch provider and model interactively
-hermes login              # Authenticate with Nous Portal (OAuth)
+hermes model              # Select provider and model
 hermes logout             # Clear stored OAuth credentials

 # Configuration
@ -1638,7 +1641,7 @@ All variables go in `~/.hermes/.env`. Run `hermes config set VAR value` to set t
 |------|-------------|
 | `~/.hermes/config.yaml` | Your settings |
 | `~/.hermes/.env` | API keys and secrets |
-| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes login`) |
+| `~/.hermes/auth.json` | OAuth provider credentials (managed by `hermes model`) |
 | `~/.hermes/cron/` | Scheduled jobs data |
 | `~/.hermes/sessions/` | Gateway session data |
 | `~/.hermes/hermes-agent/` | Installation directory |
@ -1666,7 +1669,7 @@ hermes config    # View current settings
 Common issues:
 - **"API key not set"**: Run `hermes setup` or `hermes config set OPENROUTER_API_KEY your_key`
 - **"hermes: command not found"**: Reload your shell (`source ~/.bashrc`) or check PATH
- **"Run `hermes login` to re-authenticate"**: Your Nous Portal session expired. Run `hermes login` to refresh.
+- **"Run `hermes setup` to re-authenticate"**: Your Nous Portal session expired. Run `hermes setup` or `hermes model` to refresh.
 - **"No active paid subscription"**: Your Nous Portal account needs an active subscription for inference.
 - **Gateway won't start**: Check `hermes gateway status` and logs
 - **Missing config after update**: Run `hermes config check` to see what's new, then `hermes config migrate` to add missing options
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -8,7 +8,9 @@ Resolution order for text tasks:
  1. OpenRouter  (OPENROUTER_API_KEY)
  2. Nous Portal (~/.hermes/auth.json active provider)
  3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY)
-  4. None
+  4. Codex OAuth (Responses API via chatgpt.com with gpt-5.3-codex,
+     wrapped to look like a chat.completions client)
+  5. None

 Resolution order for vision/multimodal tasks:
  1. OpenRouter
@ -20,7 +22,8 @@ import json
 import logging
 import os
 from pathlib import Path
-from typing import Optional, Tuple
+from types import SimpleNamespace
+from typing import Any, Dict, List, Optional, Tuple

 from openai import OpenAI

@ -49,6 +52,188 @@ _NOUS_MODEL = "gemini-3-flash"
 _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
 _AUTH_JSON_PATH = Path.home() / ".hermes" / "auth.json"

+# Codex fallback: uses the Responses API (the only endpoint the Codex
+# OAuth token can access) with a fast model for auxiliary tasks.
+_CODEX_AUX_MODEL = "gpt-5.3-codex"
+_CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex"
+
+
+# ── Codex Responses → chat.completions adapter ─────────────────────────────
+# All auxiliary consumers call client.chat.completions.create(**kwargs) and
+# read response.choices[0].message.content. This adapter translates those
+# calls to the Codex Responses API so callers don't need any changes.
+
+class _CodexCompletionsAdapter:
+    """Drop-in shim that accepts chat.completions.create() kwargs and
+    routes them through the Codex Responses streaming API."""
+
+    def __init__(self, real_client: OpenAI, model: str):
+        self._client = real_client
+        self._model = model
+
+    def create(self, **kwargs) -> Any:
+        messages = kwargs.get("messages", [])
+        model = kwargs.get("model", self._model)
+        temperature = kwargs.get("temperature")
+
+        # Separate system/instructions from conversation messages
+        instructions = "You are a helpful assistant."
+        input_msgs: List[Dict[str, Any]] = []
+        for msg in messages:
+            role = msg.get("role", "user")
+            content = msg.get("content", "")
+            if role == "system":
+                instructions = content
+            else:
+                input_msgs.append({"role": role, "content": content})
+
+        resp_kwargs: Dict[str, Any] = {
+            "model": model,
+            "instructions": instructions,
+            "input": input_msgs or [{"role": "user", "content": ""}],
+            "stream": True,
+            "store": False,
+        }
+
+        max_tokens = kwargs.get("max_output_tokens") or kwargs.get("max_completion_tokens") or kwargs.get("max_tokens")
+        if max_tokens is not None:
+            resp_kwargs["max_output_tokens"] = int(max_tokens)
+        if temperature is not None:
+            resp_kwargs["temperature"] = temperature
+
+        # Tools support for flush_memories and similar callers
+        tools = kwargs.get("tools")
+        if tools:
+            converted = []
+            for t in tools:
+                fn = t.get("function", {}) if isinstance(t, dict) else {}
+                name = fn.get("name")
+                if not name:
+                    continue
+                converted.append({
+                    "type": "function",
+                    "name": name,
+                    "description": fn.get("description", ""),
+                    "parameters": fn.get("parameters", {}),
+                })
+            if converted:
+                resp_kwargs["tools"] = converted
+
+        # Stream and collect the response
+        text_parts: List[str] = []
+        tool_calls_raw: List[Any] = []
+        usage = None
+
+        try:
+            with self._client.responses.stream(**resp_kwargs) as stream:
+                for _event in stream:
+                    pass
+                final = stream.get_final_response()
+
+            # Extract text and tool calls from the Responses output
+            for item in getattr(final, "output", []):
+                item_type = getattr(item, "type", None)
+                if item_type == "message":
+                    for part in getattr(item, "content", []):
+                        ptype = getattr(part, "type", None)
+                        if ptype in ("output_text", "text"):
+                            text_parts.append(getattr(part, "text", ""))
+                elif item_type == "function_call":
+                    tool_calls_raw.append(SimpleNamespace(
+                        id=getattr(item, "call_id", ""),
+                        type="function",
+                        function=SimpleNamespace(
+                            name=getattr(item, "name", ""),
+                            arguments=getattr(item, "arguments", "{}"),
+                        ),
+                    ))
+
+            resp_usage = getattr(final, "usage", None)
+            if resp_usage:
+                usage = SimpleNamespace(
+                    prompt_tokens=getattr(resp_usage, "input_tokens", 0),
+                    completion_tokens=getattr(resp_usage, "output_tokens", 0),
+                    total_tokens=getattr(resp_usage, "total_tokens", 0),
+                )
+        except Exception as exc:
+            logger.debug("Codex auxiliary Responses API call failed: %s", exc)
+            raise
+
+        content = "".join(text_parts).strip() or None
+
+        # Build a response that looks like chat.completions
+        message = SimpleNamespace(
+            role="assistant",
+            content=content,
+            tool_calls=tool_calls_raw or None,
+        )
+        choice = SimpleNamespace(
+            index=0,
+            message=message,
+            finish_reason="stop" if not tool_calls_raw else "tool_calls",
+        )
+        return SimpleNamespace(
+            choices=[choice],
+            model=model,
+            usage=usage,
+        )
+
+
+class _CodexChatShim:
+    """Wraps the adapter to provide client.chat.completions.create()."""
+
+    def __init__(self, adapter: _CodexCompletionsAdapter):
+        self.completions = adapter
+
+
+class CodexAuxiliaryClient:
+    """OpenAI-client-compatible wrapper that routes through Codex Responses API.
+
+    Consumers can call client.chat.completions.create(**kwargs) as normal.
+    Also exposes .api_key and .base_url for introspection by async wrappers.
+    """
+
+    def __init__(self, real_client: OpenAI, model: str):
+        self._real_client = real_client
+        adapter = _CodexCompletionsAdapter(real_client, model)
+        self.chat = _CodexChatShim(adapter)
+        self.api_key = real_client.api_key
+        self.base_url = real_client.base_url
+
+    def close(self):
+        self._real_client.close()
+
+
+class _AsyncCodexCompletionsAdapter:
+    """Async version of the Codex Responses adapter.
+
+    Wraps the sync adapter via asyncio.to_thread() so async consumers
+    (web_tools, session_search) can await it as normal.
+    """
+
+    def __init__(self, sync_adapter: _CodexCompletionsAdapter):
+        self._sync = sync_adapter
+
+    async def create(self, **kwargs) -> Any:
+        import asyncio
+        return await asyncio.to_thread(self._sync.create, **kwargs)
+
+
+class _AsyncCodexChatShim:
+    def __init__(self, adapter: _AsyncCodexCompletionsAdapter):
+        self.completions = adapter
+
+
+class AsyncCodexAuxiliaryClient:
+    """Async-compatible wrapper matching AsyncOpenAI.chat.completions.create()."""
+
+    def __init__(self, sync_wrapper: "CodexAuxiliaryClient"):
+        sync_adapter = sync_wrapper.chat.completions
+        async_adapter = _AsyncCodexCompletionsAdapter(sync_adapter)
+        self.chat = _AsyncCodexChatShim(async_adapter)
+        self.api_key = sync_wrapper.api_key
+        self.base_url = sync_wrapper.base_url
+

 def _read_nous_auth() -> Optional[dict]:
    """Read and validate ~/.hermes/auth.json for an active Nous provider.
@ -82,12 +267,31 @@ def _nous_base_url() -> str:
    return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL)


+def _read_codex_access_token() -> Optional[str]:
+    """Read a valid Codex OAuth access token from ~/.codex/auth.json."""
+    try:
+        codex_auth = Path.home() / ".codex" / "auth.json"
+        if not codex_auth.is_file():
+            return None
+        data = json.loads(codex_auth.read_text())
+        tokens = data.get("tokens")
+        if not isinstance(tokens, dict):
+            return None
+        access_token = tokens.get("access_token")
+        if isinstance(access_token, str) and access_token.strip():
+            return access_token.strip()
+        return None
+    except Exception as exc:
+        logger.debug("Could not read Codex auth for auxiliary client: %s", exc)
+        return None
+
+
 # ── Public API ──────────────────────────────────────────────────────────────

 def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
    """Return (client, model_slug) for text-only auxiliary tasks.

-    Falls through OpenRouter -> Nous Portal -> custom endpoint -> (None, None).
+    Falls through OpenRouter -> Nous Portal -> custom endpoint -> Codex OAuth -> (None, None).
    """
    # 1. OpenRouter
    or_key = os.getenv("OPENROUTER_API_KEY")
@ -115,11 +319,44 @@ def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
        logger.debug("Auxiliary text client: custom endpoint (%s)", model)
        return OpenAI(api_key=custom_key, base_url=custom_base), model

-    # 4. Nothing available
+    # 4. Codex OAuth -- uses the Responses API (only endpoint the token
+    # can access), wrapped to look like a chat.completions client.
+    codex_token = _read_codex_access_token()
+    if codex_token:
+        logger.debug("Auxiliary text client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL)
+        real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL)
+        return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL
+
+    # 5. Nothing available
    logger.debug("Auxiliary text client: none available")
    return None, None


+def get_async_text_auxiliary_client():
+    """Return (async_client, model_slug) for async consumers.
+
+    For standard providers returns (AsyncOpenAI, model). For Codex returns
+    (AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
+    Returns (None, None) when no provider is available.
+    """
+    from openai import AsyncOpenAI
+
+    sync_client, model = get_text_auxiliary_client()
+    if sync_client is None:
+        return None, None
+
+    if isinstance(sync_client, CodexAuxiliaryClient):
+        return AsyncCodexAuxiliaryClient(sync_client), model
+
+    async_kwargs = {
+        "api_key": sync_client.api_key,
+        "base_url": str(sync_client.base_url),
+    }
+    if "openrouter" in str(sync_client.base_url).lower():
+        async_kwargs["default_headers"] = dict(_OR_HEADERS)
+    return AsyncOpenAI(**async_kwargs), model
+
+
 def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
    """Return (client, model_slug) for vision/multimodal auxiliary tasks.

@ -161,11 +398,12 @@ def auxiliary_max_tokens_param(value: int) -> dict:
    
    OpenRouter and local models use 'max_tokens'. Direct OpenAI with newer
    models (gpt-4o, o-series, gpt-5+) requires 'max_completion_tokens'.
+    The Codex adapter translates max_tokens internally, so we use max_tokens
+    for it as well.
    """
    custom_base = os.getenv("OPENAI_BASE_URL", "")
    or_key = os.getenv("OPENROUTER_API_KEY")
-    # Only use max_completion_tokens when the auxiliary client resolved to
-    # direct OpenAI (no OpenRouter key, no Nous auth, custom endpoint is api.openai.com)
+    # Only use max_completion_tokens for direct OpenAI custom endpoints
    if (not or_key
            and _read_nous_auth() is None
            and "api.openai.com" in custom_base.lower()):
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@ -31,7 +31,7 @@ class ContextCompressor:
        threshold_percent: float = 0.85,
        protect_first_n: int = 3,
        protect_last_n: int = 4,
-        summary_target_tokens: int = 500,
+        summary_target_tokens: int = 2500,
        quiet_mode: bool = False,
        summary_model_override: str = None,
    ):
--- a/cli.py
+++ b/cli.py
@ -841,12 +841,10 @@ class HermesCLI:
            or os.getenv("OPENROUTER_BASE_URL", CLI_CONFIG["model"]["base_url"])
        )
        self.api_key = api_key or os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY")
-        # Max turns priority: CLI arg > env var > config file (agent.max_turns or root max_turns) > default
-        if max_turns != 60:  # CLI arg was explicitly set
        self._nous_key_expires_at: Optional[str] = None
        self._nous_key_source: Optional[str] = None
-        # Max turns priority: CLI arg > config file > env var > default
-        if max_turns is not None:
+        # Max turns priority: CLI arg > env var > config file (agent.max_turns or root max_turns) > default
+        if max_turns is not None:  # CLI arg was explicitly set
            self.max_turns = max_turns
        elif CLI_CONFIG["agent"].get("max_turns"):
            self.max_turns = CLI_CONFIG["agent"]["max_turns"]
--- a/docs/cli.md
+++ b/docs/cli.md
@ -12,7 +12,7 @@ hermes
 hermes --model "anthropic/claude-sonnet-4"

 # With specific provider
-hermes --provider nous        # Use Nous Portal (requires: hermes login)
+hermes --provider nous        # Use Nous Portal (requires: hermes model)
 hermes --provider openrouter  # Force OpenRouter

 # With specific toolsets
@ -93,7 +93,7 @@ model:
 ```

 **Provider selection** (`provider` field):
- `auto` (default): Uses Nous Portal if logged in (`hermes login`), otherwise falls back to OpenRouter/env vars.
+- `auto` (default): Uses Nous Portal if logged in (`hermes model`), otherwise falls back to OpenRouter/env vars.
 - `openrouter`: Always uses `OPENROUTER_API_KEY` from `.env`.
 - `nous`: Always uses Nous Portal OAuth credentials from `auth.json`.

--- a/gateway/run.py
+++ b/gateway/run.py
@ -214,17 +214,12 @@ class GatewayRunner:
                return

            from run_agent import AIAgent
-            _flush_api_key = os.getenv("OPENAI_API_KEY") or os.getenv("OPENROUTER_API_KEY", "")
-            _flush_base_url = os.getenv("OPENAI_BASE_URL") or os.getenv("OPENROUTER_BASE_URL", "https://openrouter.ai/api/v1")
-            _flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL", "anthropic/claude-opus-4.6")
-
-            if not _flush_api_key:
+            runtime_kwargs = _resolve_runtime_agent_kwargs()
+            if not runtime_kwargs.get("api_key"):
                return

            tmp_agent = AIAgent(
-                model=_flush_model,
-                api_key=_flush_api_key,
-                base_url=_flush_base_url,
+                **runtime_kwargs,
                max_iterations=8,
                quiet_mode=True,
                enabled_toolsets=["memory", "skills"],
@ -979,12 +974,10 @@ class GatewayRunner:
                if old_history:
                    from run_agent import AIAgent
                    loop = asyncio.get_event_loop()
-                    # Resolve credentials so the flush agent can reach the LLM
-                    _flush_model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
+                    _flush_kwargs = _resolve_runtime_agent_kwargs()
                    def _do_flush():
                        tmp_agent = AIAgent(
-                            model=_flush_model,
-                            **_resolve_runtime_agent_kwargs(),
+                            **_flush_kwargs,
                            max_iterations=5,
                            quiet_mode=True,
                            enabled_toolsets=["memory"],
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@ -10,7 +10,7 @@ Architecture:
 - Auth store (auth.json) holds per-provider credential state
 - resolve_provider() picks the active provider via priority chain
 - resolve_*_runtime_credentials() handles token refresh and key minting
- login_command() / logout_command() are the CLI entry points
+- logout_command() is the CLI entry point for clearing auth
 """

 from __future__ import annotations
@ -127,7 +127,7 @@ def format_auth_error(error: Exception) -> str:
        return str(error)

    if error.relogin_required:
-        return f"{error} Run `hermes login` to re-authenticate."
+        return f"{error} Run `hermes model` to re-authenticate."

    if error.code == "subscription_required":
        return (
@ -1172,6 +1172,39 @@ def get_auth_status(provider_id: Optional[str] = None) -> Dict[str, Any]:
    return {"logged_in": False}


+# =============================================================================
+# External credential detection
+# =============================================================================
+
+def detect_external_credentials() -> List[Dict[str, Any]]:
+    """Scan for credentials from other CLI tools that Hermes can reuse.
+
+    Returns a list of dicts, each with:
+      - provider: str   -- Hermes provider id (e.g. "openai-codex")
+      - path: str       -- filesystem path where creds were found
+      - label: str      -- human-friendly description for the setup UI
+    """
+    found: List[Dict[str, Any]] = []
+
+    # Codex CLI: ~/.codex/auth.json (or $CODEX_HOME/auth.json)
+    try:
+        codex_home = resolve_codex_home_path()
+        codex_auth = codex_home / "auth.json"
+        if codex_auth.is_file():
+            data = json.loads(codex_auth.read_text())
+            tokens = data.get("tokens", {})
+            if isinstance(tokens, dict) and tokens.get("access_token"):
+                found.append({
+                    "provider": "openai-codex",
+                    "path": str(codex_auth),
+                    "label": f"Codex CLI credentials found ({codex_auth})",
+                })
+    except Exception:
+        pass
+
+    return found
+
+
 # =============================================================================
 # CLI Commands — login / logout
 # =============================================================================
@ -1328,56 +1361,43 @@ def _save_model_choice(model_id: str) -> None:


 def login_command(args) -> None:
-    """Run OAuth device code login for the selected provider."""
-    provider_id = getattr(args, "provider", None) or "nous"
-
-    if provider_id not in PROVIDER_REGISTRY:
-        print(f"Unknown provider: {provider_id}")
-        print(f"Available: {', '.join(PROVIDER_REGISTRY.keys())}")
-        raise SystemExit(1)
-
-    pconfig = PROVIDER_REGISTRY[provider_id]
-
-    if provider_id == "nous":
-        _login_nous(args, pconfig)
-    elif provider_id == "openai-codex":
-        _login_openai_codex(args, pconfig)
-    else:
-        print(f"Login for provider '{provider_id}' is not yet implemented.")
-        raise SystemExit(1)
+    """Deprecated: use 'hermes model' or 'hermes setup' instead."""
+    print("The 'hermes login' command has been removed.")
+    print("Use 'hermes model' to select a provider and model,")
+    print("or 'hermes setup' for full interactive setup.")
+    raise SystemExit(0)


 def _login_openai_codex(args, pconfig: ProviderConfig) -> None:
-    """OpenAI Codex login flow using Codex CLI auth state."""
-    codex_path = shutil.which("codex")
-    if not codex_path:
-        print("Codex CLI was not found in PATH.")
-        print("Install Codex CLI, then retry `hermes login --provider openai-codex`.")
-        raise SystemExit(1)
+    """OpenAI Codex login via device code flow (no Codex CLI required)."""
+    codex_home = resolve_codex_home_path()

-    print(f"Starting Hermes login via {pconfig.name}...")
-    print(f"Using Codex CLI: {codex_path}")
-    print(f"Codex home: {resolve_codex_home_path()}")
-
-    creds: Dict[str, Any]
+    # Check for existing valid credentials first
    try:
-        creds = resolve_codex_runtime_credentials()
+        existing = resolve_codex_runtime_credentials()
+        print(f"Existing Codex credentials found at {codex_home / 'auth.json'}")
+        try:
+            reuse = input("Use existing credentials? [Y/n]: ").strip().lower()
+        except (EOFError, KeyboardInterrupt):
+            reuse = "y"
+        if reuse in ("", "y", "yes"):
+            creds = existing
+            _save_codex_provider_state(creds)
+            return
    except AuthError:
-        print("No usable Codex auth found. Running `codex login`...")
-        try:
-            subprocess.run(["codex", "login"], check=True)
-        except subprocess.CalledProcessError as exc:
-            print(f"Codex login failed with exit code {exc.returncode}.")
-            raise SystemExit(1)
-        except KeyboardInterrupt:
-            print("\nLogin cancelled.")
-            raise SystemExit(130)
-        try:
-            creds = resolve_codex_runtime_credentials()
-        except AuthError as exc:
-            print(format_auth_error(exc))
-            raise SystemExit(1)
+        pass

+    # No existing creds (or user declined) -- run device code flow
+    print()
+    print("Signing in to OpenAI Codex...")
+    print()
+
+    creds = _codex_device_code_login()
+    _save_codex_provider_state(creds)
+
+
+def _save_codex_provider_state(creds: Dict[str, Any]) -> None:
+    """Persist Codex provider state to auth store and config."""
    auth_state = {
        "auth_file": creds.get("auth_file"),
        "codex_home": creds.get("codex_home"),
@ -1391,13 +1411,170 @@ def _login_openai_codex(args, pconfig: ProviderConfig) -> None:
        _save_provider_state(auth_store, "openai-codex", auth_state)
        saved_to = _save_auth_store(auth_store)

-    config_path = _update_config_for_provider("openai-codex", creds["base_url"])
+    config_path = _update_config_for_provider("openai-codex", creds.get("base_url", DEFAULT_CODEX_BASE_URL))
    print()
    print("Login successful!")
    print(f"  Auth state: {saved_to}")
    print(f"  Config updated: {config_path} (model.provider=openai-codex)")


+def _codex_device_code_login() -> Dict[str, Any]:
+    """Run the OpenAI device code login flow and return credentials dict."""
+    import time as _time
+
+    issuer = "https://auth.openai.com"
+    client_id = CODEX_OAUTH_CLIENT_ID
+
+    # Step 1: Request device code
+    try:
+        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
+            resp = client.post(
+                f"{issuer}/api/accounts/deviceauth/usercode",
+                json={"client_id": client_id},
+                headers={"Content-Type": "application/json"},
+            )
+    except Exception as exc:
+        raise AuthError(
+            f"Failed to request device code: {exc}",
+            provider="openai-codex", code="device_code_request_failed",
+        )
+
+    if resp.status_code != 200:
+        raise AuthError(
+            f"Device code request returned status {resp.status_code}.",
+            provider="openai-codex", code="device_code_request_error",
+        )
+
+    device_data = resp.json()
+    user_code = device_data.get("user_code", "")
+    device_auth_id = device_data.get("device_auth_id", "")
+    poll_interval = max(3, int(device_data.get("interval", "5")))
+
+    if not user_code or not device_auth_id:
+        raise AuthError(
+            "Device code response missing required fields.",
+            provider="openai-codex", code="device_code_incomplete",
+        )
+
+    # Step 2: Show user the code
+    print("To continue, follow these steps:\n")
+    print(f"  1. Open this URL in your browser:")
+    print(f"     \033[94m{issuer}/codex/device\033[0m\n")
+    print(f"  2. Enter this code:")
+    print(f"     \033[94m{user_code}\033[0m\n")
+    print("Waiting for sign-in... (press Ctrl+C to cancel)")
+
+    # Step 3: Poll for authorization code
+    max_wait = 15 * 60  # 15 minutes
+    start = _time.monotonic()
+    code_resp = None
+
+    try:
+        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
+            while _time.monotonic() - start < max_wait:
+                _time.sleep(poll_interval)
+                poll_resp = client.post(
+                    f"{issuer}/api/accounts/deviceauth/token",
+                    json={"device_auth_id": device_auth_id, "user_code": user_code},
+                    headers={"Content-Type": "application/json"},
+                )
+
+                if poll_resp.status_code == 200:
+                    code_resp = poll_resp.json()
+                    break
+                elif poll_resp.status_code in (403, 404):
+                    continue  # User hasn't completed login yet
+                else:
+                    raise AuthError(
+                        f"Device auth polling returned status {poll_resp.status_code}.",
+                        provider="openai-codex", code="device_code_poll_error",
+                    )
+    except KeyboardInterrupt:
+        print("\nLogin cancelled.")
+        raise SystemExit(130)
+
+    if code_resp is None:
+        raise AuthError(
+            "Login timed out after 15 minutes.",
+            provider="openai-codex", code="device_code_timeout",
+        )
+
+    # Step 4: Exchange authorization code for tokens
+    authorization_code = code_resp.get("authorization_code", "")
+    code_verifier = code_resp.get("code_verifier", "")
+    redirect_uri = f"{issuer}/deviceauth/callback"
+
+    if not authorization_code or not code_verifier:
+        raise AuthError(
+            "Device auth response missing authorization_code or code_verifier.",
+            provider="openai-codex", code="device_code_incomplete_exchange",
+        )
+
+    try:
+        with httpx.Client(timeout=httpx.Timeout(15.0)) as client:
+            token_resp = client.post(
+                CODEX_OAUTH_TOKEN_URL,
+                data={
+                    "grant_type": "authorization_code",
+                    "code": authorization_code,
+                    "redirect_uri": redirect_uri,
+                    "client_id": client_id,
+                    "code_verifier": code_verifier,
+                },
+                headers={"Content-Type": "application/x-www-form-urlencoded"},
+            )
+    except Exception as exc:
+        raise AuthError(
+            f"Token exchange failed: {exc}",
+            provider="openai-codex", code="token_exchange_failed",
+        )
+
+    if token_resp.status_code != 200:
+        raise AuthError(
+            f"Token exchange returned status {token_resp.status_code}.",
+            provider="openai-codex", code="token_exchange_error",
+        )
+
+    tokens = token_resp.json()
+    access_token = tokens.get("access_token", "")
+    refresh_token = tokens.get("refresh_token", "")
+
+    if not access_token:
+        raise AuthError(
+            "Token exchange did not return an access_token.",
+            provider="openai-codex", code="token_exchange_no_access_token",
+        )
+
+    # Step 5: Persist tokens to ~/.codex/auth.json
+    codex_home = resolve_codex_home_path()
+    codex_home.mkdir(parents=True, exist_ok=True)
+    auth_path = codex_home / "auth.json"
+
+    payload = {
+        "tokens": {
+            "access_token": access_token,
+            "refresh_token": refresh_token,
+        },
+        "last_refresh": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+    }
+    _persist_codex_auth_payload(auth_path, payload, lock_held=False)
+
+    base_url = (
+        os.getenv("HERMES_CODEX_BASE_URL", "").strip().rstrip("/")
+        or DEFAULT_CODEX_BASE_URL
+    )
+
+    return {
+        "api_key": access_token,
+        "base_url": base_url,
+        "auth_file": str(auth_path),
+        "codex_home": str(codex_home),
+        "last_refresh": payload["last_refresh"],
+        "auth_mode": "chatgpt",
+        "source": "device-code",
+    }
+
+
 def _login_nous(args, pconfig: ProviderConfig) -> None:
    """Nous Portal device authorization flow."""
    portal_base_url = (
@ -1579,6 +1756,6 @@ def logout_command(args) -> None:
        if os.getenv("OPENROUTER_API_KEY"):
            print("Hermes will use OpenRouter for inference.")
        else:
-            print("Run `hermes login` or configure an API key to use Hermes.")
+            print("Run `hermes model` or configure an API key to use Hermes.")
    else:
        print(f"No auth state found for {provider_name}.")
--- a/hermes_cli/codex_models.py
+++ b/hermes_cli/codex_models.py
@ -1,21 +1,62 @@
-"""Codex model discovery from local Codex CLI cache/config."""
+"""Codex model discovery from API, local cache, and config."""

 from __future__ import annotations

 import json
+import logging
 from pathlib import Path
 from typing import List, Optional

 from hermes_cli.auth import resolve_codex_home_path

+logger = logging.getLogger(__name__)
+
 DEFAULT_CODEX_MODELS: List[str] = [
-    "gpt-5-codex",
    "gpt-5.3-codex",
    "gpt-5.2-codex",
-    "gpt-5.1-codex",
+    "gpt-5.1-codex-max",
+    "gpt-5.1-codex-mini",
 ]


+def _fetch_models_from_api(access_token: str) -> List[str]:
+    """Fetch available models from the Codex API. Returns visible models sorted by priority."""
+    try:
+        import httpx
+        resp = httpx.get(
+            "https://chatgpt.com/backend-api/codex/models?client_version=1.0.0",
+            headers={"Authorization": f"Bearer {access_token}"},
+            timeout=10,
+        )
+        if resp.status_code != 200:
+            return []
+        data = resp.json()
+        entries = data.get("models", []) if isinstance(data, dict) else []
+    except Exception as exc:
+        logger.debug("Failed to fetch Codex models from API: %s", exc)
+        return []
+
+    sortable = []
+    for item in entries:
+        if not isinstance(item, dict):
+            continue
+        slug = item.get("slug")
+        if not isinstance(slug, str) or not slug.strip():
+            continue
+        slug = slug.strip()
+        if item.get("supported_in_api") is False:
+            continue
+        visibility = item.get("visibility", "")
+        if isinstance(visibility, str) and visibility.strip().lower() == "hide":
+            continue
+        priority = item.get("priority")
+        rank = int(priority) if isinstance(priority, (int, float)) else 10_000
+        sortable.append((rank, slug))
+
+    sortable.sort(key=lambda x: (x[0], x[1]))
+    return [slug for _, slug in sortable]
+
+
 def _read_default_model(codex_home: Path) -> Optional[str]:
    config_path = codex_home / "config.toml"
    if not config_path.exists():
@ -72,10 +113,22 @@ def _read_cache_models(codex_home: Path) -> List[str]:
    return deduped


-def get_codex_model_ids() -> List[str]:
+def get_codex_model_ids(access_token: Optional[str] = None) -> List[str]:
+    """Return available Codex model IDs, trying API first, then local sources.
+    
+    Resolution order: API (live, if token provided) > config.toml default >
+    local cache > hardcoded defaults.
+    """
    codex_home = resolve_codex_home_path()
    ordered: List[str] = []

+    # Try live API if we have a token
+    if access_token:
+        api_models = _fetch_models_from_api(access_token)
+        if api_models:
+            return api_models
+
+    # Fall back to local sources
    default_model = _read_default_model(codex_home)
    if default_model:
        ordered.append(default_model)
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -12,7 +12,6 @@ Usage:
    hermes gateway install     # Install gateway service
    hermes gateway uninstall   # Uninstall gateway service
    hermes setup               # Interactive setup wizard
-    hermes login               # Authenticate with Nous Portal (or other providers)
    hermes logout              # Clear stored authentication
    hermes status              # Show status of all components
    hermes cron                # Manage cron jobs
@ -547,7 +546,14 @@ def _model_flow_openai_codex(config, current_model=""):
            print(f"Login failed: {exc}")
            return

-    codex_models = get_codex_model_ids()
+    _codex_token = None
+    try:
+        from hermes_cli.auth import resolve_codex_runtime_credentials
+        _codex_creds = resolve_codex_runtime_credentials()
+        _codex_token = _codex_creds.get("api_key")
+    except Exception:
+        pass
+    codex_models = get_codex_model_ids(access_token=_codex_token)

    selected = _prompt_model_selection(codex_models, current_model=current_model)
    if selected:
@ -827,8 +833,8 @@ def cmd_update(args):
            pass  # No systemd (macOS, WSL1, etc.) — skip silently
        
        print()
-        print("Tip: You can now log in with Nous Portal for inference:")
-        print("  hermes login              # Authenticate with Nous Portal")
+        print("Tip: You can now select a provider and model:")
+        print("  hermes model              # Select provider and model")
        
    except subprocess.CalledProcessError as e:
        print(f"✗ Update failed: {e}")
@ -848,7 +854,6 @@ Examples:
    hermes --continue             Resume the most recent session
    hermes --resume <session_id>  Resume a specific session
    hermes setup                  Run setup wizard
-    hermes login                  Authenticate with an inference provider
    hermes logout                 Clear stored authentication
    hermes model                  Select default model
    hermes config                 View configuration
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@ -621,11 +621,23 @@ def run_setup_wizard(args):
        format_auth_error, AuthError, fetch_nous_models,
        resolve_nous_runtime_credentials, _update_config_for_provider,
        _login_openai_codex, get_codex_auth_status, DEFAULT_CODEX_BASE_URL,
+        detect_external_credentials,
    )
    existing_custom = get_env_value("OPENAI_BASE_URL")
    existing_or = get_env_value("OPENROUTER_API_KEY")
    active_oauth = get_active_provider()

+    # Detect credentials from other CLI tools
+    detected_creds = detect_external_credentials()
+    if detected_creds:
+        print_info("Detected existing credentials:")
+        for cred in detected_creds:
+            if cred["provider"] == "openai-codex":
+                print_success(f"  * {cred['label']} -- select \"OpenAI Codex\" to use it")
+            else:
+                print_info(f"  * {cred['label']}")
+        print()
+
    # Detect if any provider is already configured
    has_any_provider = bool(active_oauth or existing_custom or existing_or)
    
@ -694,11 +706,11 @@ def run_setup_wizard(args):

        except SystemExit:
            print_warning("Nous Portal login was cancelled or failed.")
-            print_info("You can try again later with: hermes login")
+            print_info("You can try again later with: hermes model")
            selected_provider = None
        except Exception as e:
            print_error(f"Login failed: {e}")
-            print_info("You can try again later with: hermes login")
+            print_info("You can try again later with: hermes model")
            selected_provider = None

    elif provider_idx == 1:  # OpenAI Codex
@ -718,11 +730,11 @@ def run_setup_wizard(args):
            _update_config_for_provider("openai-codex", DEFAULT_CODEX_BASE_URL)
        except SystemExit:
            print_warning("OpenAI Codex login was cancelled or failed.")
-            print_info("You can try again later with: hermes login --provider openai-codex")
+            print_info("You can try again later with: hermes model")
            selected_provider = None
        except Exception as e:
            print_error(f"Login failed: {e}")
-            print_info("You can try again later with: hermes login --provider openai-codex")
+            print_info("You can try again later with: hermes model")
            selected_provider = None

    elif provider_idx == 2:  # OpenRouter
@ -834,7 +846,15 @@ def run_setup_wizard(args):
            # else: keep current
        elif selected_provider == "openai-codex":
            from hermes_cli.codex_models import get_codex_model_ids
-            codex_models = get_codex_model_ids()
+            # Try to get the access token for live model discovery
+            _codex_token = None
+            try:
+                from hermes_cli.auth import resolve_codex_runtime_credentials
+                _codex_creds = resolve_codex_runtime_credentials()
+                _codex_token = _codex_creds.get("api_key")
+            except Exception:
+                pass
+            codex_models = get_codex_model_ids(access_token=_codex_token)
            model_choices = [f"{m}" for m in codex_models]
            model_choices.append("Custom model")
            model_choices.append(f"Keep current ({current_model})")
--- a/hermes_cli/status.py
+++ b/hermes_cli/status.py
@ -111,7 +111,7 @@ def show_status(args):
    nous_logged_in = bool(nous_status.get("logged_in"))
    print(
        f"  {'Nous Portal':<12}  {check_mark(nous_logged_in)} "
-        f"{'logged in' if nous_logged_in else 'not logged in (run: hermes login)'}"
+        f"{'logged in' if nous_logged_in else 'not logged in (run: hermes model)'}"
    )
    if nous_logged_in:
        portal_url = nous_status.get("portal_base_url") or "(unknown)"
@ -126,7 +126,7 @@ def show_status(args):
    codex_logged_in = bool(codex_status.get("logged_in"))
    print(
        f"  {'OpenAI Codex':<12}  {check_mark(codex_logged_in)} "
-        f"{'logged in' if codex_logged_in else 'not logged in (run: hermes login --provider openai-codex)'}"
+        f"{'logged in' if codex_logged_in else 'not logged in (run: hermes model)'}"
    )
    codex_auth_file = codex_status.get("auth_file")
    if codex_auth_file:
--- a/run_agent.py
+++ b/run_agent.py
@ -1432,6 +1432,14 @@ class AIAgent:
                content_text = str(content) if content is not None else ""

                if role == "assistant":
+                    # Replay encrypted reasoning items from previous turns
+                    # so the API can maintain coherent reasoning chains.
+                    codex_reasoning = msg.get("codex_reasoning_items")
+                    if isinstance(codex_reasoning, list):
+                        for ri in codex_reasoning:
+                            if isinstance(ri, dict) and ri.get("encrypted_content"):
+                                items.append(ri)
+
                    if content_text.strip():
                        items.append({"role": "assistant", "content": content_text})

@ -1638,7 +1646,10 @@ class AIAgent:
        if store is not False:
            raise ValueError("Codex Responses contract requires 'store' to be false.")

-        allowed_keys = {"model", "instructions", "input", "tools", "store"}
+        allowed_keys = {
+            "model", "instructions", "input", "tools", "store",
+            "reasoning", "include", "max_output_tokens", "temperature",
+        }
        normalized: Dict[str, Any] = {
            "model": model,
            "instructions": instructions,
@ -1647,6 +1658,22 @@ class AIAgent:
            "store": False,
        }

+        # Pass through reasoning config
+        reasoning = api_kwargs.get("reasoning")
+        if isinstance(reasoning, dict):
+            normalized["reasoning"] = reasoning
+        include = api_kwargs.get("include")
+        if isinstance(include, list):
+            normalized["include"] = include
+
+        # Pass through max_output_tokens and temperature
+        max_output_tokens = api_kwargs.get("max_output_tokens")
+        if isinstance(max_output_tokens, (int, float)) and max_output_tokens > 0:
+            normalized["max_output_tokens"] = int(max_output_tokens)
+        temperature = api_kwargs.get("temperature")
+        if isinstance(temperature, (int, float)):
+            normalized["temperature"] = float(temperature)
+
        if allow_stream:
            stream = api_kwargs.get("stream")
            if stream is not None and stream is not True:
@ -1719,6 +1746,7 @@ class AIAgent:

        content_parts: List[str] = []
        reasoning_parts: List[str] = []
+        reasoning_items_raw: List[Dict[str, Any]] = []
        tool_calls: List[Any] = []
        has_incomplete_items = response_status in {"queued", "in_progress", "incomplete"}
        saw_commentary_phase = False
@ -1750,6 +1778,16 @@ class AIAgent:
                reasoning_text = self._extract_responses_reasoning_text(item)
                if reasoning_text:
                    reasoning_parts.append(reasoning_text)
+                # Capture the full reasoning item for multi-turn continuity.
+                # encrypted_content is an opaque blob the API needs back on
+                # subsequent turns to maintain coherent reasoning chains.
+                encrypted = getattr(item, "encrypted_content", None)
+                if isinstance(encrypted, str) and encrypted:
+                    raw_item = {"type": "reasoning", "encrypted_content": encrypted}
+                    item_id = getattr(item, "id", None)
+                    if isinstance(item_id, str) and item_id:
+                        raw_item["id"] = item_id
+                    reasoning_items_raw.append(raw_item)
            elif item_type == "function_call":
                if item_status in {"queued", "in_progress", "incomplete"}:
                    continue
@ -1807,6 +1845,7 @@ class AIAgent:
            reasoning="\n\n".join(reasoning_parts).strip() if reasoning_parts else None,
            reasoning_content=None,
            reasoning_details=None,
+            codex_reasoning_items=reasoning_items_raw or None,
        )

        if tool_calls:
@ -1819,7 +1858,6 @@ class AIAgent:

    def _run_codex_stream(self, api_kwargs: dict):
        """Execute one streaming Responses API request and return the final response."""
-        api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False)
        max_stream_retries = 1
        for attempt in range(max_stream_retries + 1):
            try:
@ -1971,14 +2009,29 @@ class AIAgent:
            if not instructions:
                instructions = DEFAULT_AGENT_IDENTITY

-            return {
+            kwargs = {
                "model": self.model,
                "instructions": instructions,
                "input": self._chat_messages_to_responses_input(payload_messages),
                "tools": self._responses_tools(),
                "store": False,
+                "reasoning": {"effort": "medium", "summary": "auto"},
+                "include": ["reasoning.encrypted_content"],
            }

+            # Apply reasoning effort from config if set
+            if self.reasoning_config and isinstance(self.reasoning_config, dict):
+                if self.reasoning_config.get("enabled") is False:
+                    kwargs.pop("reasoning", None)
+                    kwargs["include"] = []
+                elif self.reasoning_config.get("effort"):
+                    kwargs["reasoning"]["effort"] = self.reasoning_config["effort"]
+
+            if self.max_tokens is not None:
+                kwargs["max_output_tokens"] = self.max_tokens
+
+            return kwargs
+
        provider_preferences = {}
        if self.providers_allowed:
            provider_preferences["only"] = self.providers_allowed
@ -2045,11 +2098,27 @@ class AIAgent:
        }

        if hasattr(assistant_message, 'reasoning_details') and assistant_message.reasoning_details:
-            msg["reasoning_details"] = [
-                {"type": d.get("type"), "text": d.get("text"), "signature": d.get("signature")}
-                for d in assistant_message.reasoning_details
-                if isinstance(d, dict)
-            ]
+            # Pass reasoning_details back unmodified so providers (OpenRouter,
+            # Anthropic, OpenAI) can maintain reasoning continuity across turns.
+            # Each provider may include opaque fields (signature, encrypted_content)
+            # that must be preserved exactly.
+            raw_details = assistant_message.reasoning_details
+            preserved = []
+            for d in raw_details:
+                if isinstance(d, dict):
+                    preserved.append(d)
+                elif hasattr(d, "__dict__"):
+                    preserved.append(d.__dict__)
+                elif hasattr(d, "model_dump"):
+                    preserved.append(d.model_dump())
+            if preserved:
+                msg["reasoning_details"] = preserved
+
+        # Codex Responses API: preserve encrypted reasoning items for
+        # multi-turn continuity. These get replayed as input on the next turn.
+        codex_items = getattr(assistant_message, "codex_reasoning_items", None)
+        if codex_items:
+            msg["codex_reasoning_items"] = codex_items

        if assistant_message.tool_calls:
            tool_calls = []
@ -2152,40 +2221,68 @@ class AIAgent:
                messages.pop()  # remove flush msg
                return

-            api_kwargs = {
-                "model": self.model,
-                "messages": api_messages,
-                "tools": [memory_tool_def],
-                "temperature": 0.3,
-                **self._max_tokens_param(1024),
-            }
+            # Use auxiliary client for the flush call when available --
+            # it's cheaper and avoids Codex Responses API incompatibility.
+            from agent.auxiliary_client import get_text_auxiliary_client
+            aux_client, aux_model = get_text_auxiliary_client()

-            response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)
+            if aux_client:
+                api_kwargs = {
+                    "model": aux_model,
+                    "messages": api_messages,
+                    "tools": [memory_tool_def],
+                    "temperature": 0.3,
+                    "max_tokens": 5120,
+                }
+                response = aux_client.chat.completions.create(**api_kwargs, timeout=30.0)
+            elif self.api_mode == "codex_responses":
+                # No auxiliary client -- use the Codex Responses path directly
+                codex_kwargs = self._build_api_kwargs(api_messages)
+                codex_kwargs["tools"] = self._responses_tools([memory_tool_def])
+                codex_kwargs["temperature"] = 0.3
+                if "max_output_tokens" in codex_kwargs:
+                    codex_kwargs["max_output_tokens"] = 5120
+                response = self._run_codex_stream(codex_kwargs)
+            else:
+                api_kwargs = {
+                    "model": self.model,
+                    "messages": api_messages,
+                    "tools": [memory_tool_def],
+                    "temperature": 0.3,
+                    **self._max_tokens_param(5120),
+                }
+                response = self.client.chat.completions.create(**api_kwargs, timeout=30.0)

-            if response.choices:
+            # Extract tool calls from the response, handling both API formats
+            tool_calls = []
+            if self.api_mode == "codex_responses" and not aux_client:
+                assistant_msg, _ = self._normalize_codex_response(response)
+                if assistant_msg and assistant_msg.tool_calls:
+                    tool_calls = assistant_msg.tool_calls
+            elif hasattr(response, "choices") and response.choices:
                assistant_message = response.choices[0].message
                if assistant_message.tool_calls:
-                    # Execute only memory tool calls
-                    for tc in assistant_message.tool_calls:
-                        if tc.function.name == "memory":
-                            try:
-                                args = json.loads(tc.function.arguments)
-                                flush_target = args.get("target", "memory")
-                                from tools.memory_tool import memory_tool as _memory_tool
-                                result = _memory_tool(
-                                    action=args.get("action"),
-                                    target=flush_target,
-                                    content=args.get("content"),
-                                    old_text=args.get("old_text"),
-                                    store=self._memory_store,
-                                )
-                                # Also send user observations to Honcho when active
-                                if self._honcho and flush_target == "user" and args.get("action") == "add":
-                                    self._honcho_save_user_observation(args.get("content", ""))
-                                if not self.quiet_mode:
-                                    print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
-                            except Exception as e:
-                                logger.debug("Memory flush tool call failed: %s", e)
+                    tool_calls = assistant_message.tool_calls
+
+            for tc in tool_calls:
+                if tc.function.name == "memory":
+                    try:
+                        args = json.loads(tc.function.arguments)
+                        flush_target = args.get("target", "memory")
+                        from tools.memory_tool import memory_tool as _memory_tool
+                        result = _memory_tool(
+                            action=args.get("action"),
+                            target=flush_target,
+                            content=args.get("content"),
+                            old_text=args.get("old_text"),
+                            store=self._memory_store,
+                        )
+                        if self._honcho and flush_target == "user" and args.get("action") == "add":
+                            self._honcho_save_user_observation(args.get("content", ""))
+                        if not self.quiet_mode:
+                            print(f"  🧠 Memory flush: saved to {args.get('target', 'memory')}")
+                    except Exception as e:
+                        logger.debug("Memory flush tool call failed: %s", e)
        except Exception as e:
            logger.debug("Memory flush API call failed: %s", e)
        finally:
@ -2493,32 +2590,19 @@ class AIAgent:
            if _is_nous:
                summary_extra_body["tags"] = ["product=hermes-agent"]

-            summary_kwargs = {
-                "model": self.model,
-                "messages": api_messages,
-            }
-            if self.max_tokens is not None:
-                summary_kwargs.update(self._max_tokens_param(self.max_tokens))
-            if summary_extra_body:
-                summary_kwargs["extra_body"] = summary_extra_body
-
-            summary_response = self.client.chat.completions.create(**summary_kwargs)
-
-            if summary_response.choices and summary_response.choices[0].message.content:
-                final_response = summary_response.choices[0].message.content
-                if "<think>" in final_response:
-                    final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
-                if final_response:
-                    messages.append({"role": "assistant", "content": final_response})
-                else:
-                    final_response = "I reached the iteration limit and couldn't generate a summary."
+            if self.api_mode == "codex_responses":
+                codex_kwargs = self._build_api_kwargs(api_messages)
+                codex_kwargs["tools"] = None
+                summary_response = self._run_codex_stream(codex_kwargs)
+                assistant_message, _ = self._normalize_codex_response(summary_response)
+                final_response = (assistant_message.content or "").strip() if assistant_message else ""
            else:
                summary_kwargs = {
                    "model": self.model,
                    "messages": api_messages,
                }
                if self.max_tokens is not None:
-                    summary_kwargs["max_tokens"] = self.max_tokens
+                    summary_kwargs.update(self._max_tokens_param(self.max_tokens))
                if summary_extra_body:
                    summary_kwargs["extra_body"] = summary_extra_body

@ -2526,6 +2610,42 @@ class AIAgent:

                if summary_response.choices and summary_response.choices[0].message.content:
                    final_response = summary_response.choices[0].message.content
+                else:
+                    final_response = ""
+
+            if final_response:
+                if "<think>" in final_response:
+                    final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
+                if final_response:
+                    messages.append({"role": "assistant", "content": final_response})
+                else:
+                    final_response = "I reached the iteration limit and couldn't generate a summary."
+            else:
+                # Retry summary generation
+                if self.api_mode == "codex_responses":
+                    codex_kwargs = self._build_api_kwargs(api_messages)
+                    codex_kwargs["tools"] = None
+                    retry_response = self._run_codex_stream(codex_kwargs)
+                    retry_msg, _ = self._normalize_codex_response(retry_response)
+                    final_response = (retry_msg.content or "").strip() if retry_msg else ""
+                else:
+                    summary_kwargs = {
+                        "model": self.model,
+                        "messages": api_messages,
+                    }
+                    if self.max_tokens is not None:
+                        summary_kwargs["max_tokens"] = self.max_tokens
+                    if summary_extra_body:
+                        summary_kwargs["extra_body"] = summary_extra_body
+
+                    summary_response = self.client.chat.completions.create(**summary_kwargs)
+
+                    if summary_response.choices and summary_response.choices[0].message.content:
+                        final_response = summary_response.choices[0].message.content
+                    else:
+                        final_response = ""
+
+                if final_response:
                    if "<think>" in final_response:
                        final_response = re.sub(r'<think>.*?</think>\s*', '', final_response, flags=re.DOTALL).strip()
                    messages.append({"role": "assistant", "content": final_response})
--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -0,0 +1,168 @@
+"""Tests for agent.auxiliary_client resolution chain, especially the Codex fallback."""
+
+import json
+import os
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+from agent.auxiliary_client import (
+    get_text_auxiliary_client,
+    get_vision_auxiliary_client,
+    auxiliary_max_tokens_param,
+    _read_codex_access_token,
+)
+
+
+@pytest.fixture(autouse=True)
+def _clean_env(monkeypatch):
+    """Strip provider env vars so each test starts clean."""
+    for key in (
+        "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
+        "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
+    ):
+        monkeypatch.delenv(key, raising=False)
+
+
+@pytest.fixture
+def codex_auth_dir(tmp_path, monkeypatch):
+    """Provide a writable ~/.codex/ directory with a valid auth.json."""
+    codex_dir = tmp_path / ".codex"
+    codex_dir.mkdir()
+    auth_file = codex_dir / "auth.json"
+    auth_file.write_text(json.dumps({
+        "tokens": {
+            "access_token": "codex-test-token-abc123",
+            "refresh_token": "codex-refresh-xyz",
+        }
+    }))
+    monkeypatch.setattr(
+        "agent.auxiliary_client._read_codex_access_token",
+        lambda: "codex-test-token-abc123",
+    )
+    return codex_dir
+
+
+class TestReadCodexAccessToken:
+    def test_valid_auth_file(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        auth = codex_dir / "auth.json"
+        auth.write_text(json.dumps({
+            "tokens": {"access_token": "tok-123", "refresh_token": "r-456"}
+        }))
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result == "tok-123"
+
+    def test_missing_file_returns_none(self, tmp_path):
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result is None
+
+    def test_empty_token_returns_none(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        auth = codex_dir / "auth.json"
+        auth.write_text(json.dumps({"tokens": {"access_token": "  "}}))
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result is None
+
+    def test_malformed_json_returns_none(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        (codex_dir / "auth.json").write_text("{bad json")
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result is None
+
+    def test_missing_tokens_key_returns_none(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        (codex_dir / "auth.json").write_text(json.dumps({"other": "data"}))
+        with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
+            result = _read_codex_access_token()
+        assert result is None
+
+
+class TestGetTextAuxiliaryClient:
+    """Test the full resolution chain for get_text_auxiliary_client."""
+
+    def test_openrouter_takes_priority(self, monkeypatch, codex_auth_dir):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        with patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client()
+        assert model == "google/gemini-3-flash-preview"
+        mock_openai.assert_called_once()
+        call_kwargs = mock_openai.call_args
+        assert call_kwargs.kwargs["api_key"] == "or-key"
+
+    def test_nous_takes_priority_over_codex(self, monkeypatch, codex_auth_dir):
+        with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            mock_nous.return_value = {"access_token": "nous-tok"}
+            client, model = get_text_auxiliary_client()
+        assert model == "gemini-3-flash"
+
+    def test_custom_endpoint_over_codex(self, monkeypatch, codex_auth_dir):
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key")
+        # Override the autouse monkeypatch for codex
+        monkeypatch.setattr(
+            "agent.auxiliary_client._read_codex_access_token",
+            lambda: "codex-test-token-abc123",
+        )
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client()
+        assert model == "gpt-4o-mini"
+        call_kwargs = mock_openai.call_args
+        assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1"
+
+    def test_codex_fallback_when_nothing_else(self, codex_auth_dir):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock_openai:
+            client, model = get_text_auxiliary_client()
+        assert model == "gpt-5.3-codex"
+        # Returns a CodexAuxiliaryClient wrapper, not a raw OpenAI client
+        from agent.auxiliary_client import CodexAuxiliaryClient
+        assert isinstance(client, CodexAuxiliaryClient)
+
+    def test_returns_none_when_nothing_available(self):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+            client, model = get_text_auxiliary_client()
+        assert client is None
+        assert model is None
+
+
+class TestCodexNotInVisionClient:
+    """Codex fallback should NOT apply to vision tasks."""
+
+    def test_vision_returns_none_without_openrouter_nous(self):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None):
+            client, model = get_vision_auxiliary_client()
+        assert client is None
+        assert model is None
+
+
+class TestAuxiliaryMaxTokensParam:
+    def test_codex_fallback_uses_max_tokens(self, monkeypatch):
+        """Codex adapter translates max_tokens internally, so we return max_tokens."""
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value="tok"):
+            result = auxiliary_max_tokens_param(1024)
+        assert result == {"max_tokens": 1024}
+
+    def test_openrouter_uses_max_tokens(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        result = auxiliary_max_tokens_param(1024)
+        assert result == {"max_tokens": 1024}
+
+    def test_no_provider_uses_max_tokens(self):
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value=None):
+            result = auxiliary_max_tokens_param(1024)
+        assert result == {"max_tokens": 1024}
--- a/tests/test_auth_codex_provider.py
+++ b/tests/test_auth_codex_provider.py
@ -185,8 +185,8 @@ def test_login_openai_codex_persists_provider_state(tmp_path, monkeypatch):
    _write_codex_auth(codex_home)
    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
    monkeypatch.setenv("CODEX_HOME", str(codex_home))
-    monkeypatch.setattr("hermes_cli.auth.shutil.which", lambda _: "/usr/local/bin/codex")
-    monkeypatch.setattr("hermes_cli.auth.subprocess.run", lambda *a, **k: None)
+    # Mock input() to accept existing credentials
+    monkeypatch.setattr("builtins.input", lambda _: "y")

    _login_openai_codex(SimpleNamespace(), PROVIDER_REGISTRY["openai-codex"])

@ -201,19 +201,10 @@ def test_login_openai_codex_persists_provider_state(tmp_path, monkeypatch):
    assert config["model"]["base_url"] == DEFAULT_CODEX_BASE_URL


-def test_login_command_defaults_to_nous(monkeypatch):
-    calls = {"nous": 0, "codex": 0}
-
-    def _fake_nous(args, pconfig):
-        calls["nous"] += 1
-
-    def _fake_codex(args, pconfig):
-        calls["codex"] += 1
-
-    monkeypatch.setattr("hermes_cli.auth._login_nous", _fake_nous)
-    monkeypatch.setattr("hermes_cli.auth._login_openai_codex", _fake_codex)
-
-    login_command(SimpleNamespace())
-
-    assert calls["nous"] == 1
-    assert calls["codex"] == 0
+def test_login_command_shows_deprecation(monkeypatch, capsys):
+    """login_command is deprecated and directs users to hermes model."""
+    with pytest.raises(SystemExit) as exc_info:
+        login_command(SimpleNamespace())
+    assert exc_info.value.code == 0
+    captured = capsys.readouterr()
+    assert "hermes model" in captured.out
--- a/tests/test_cli_init.py
+++ b/tests/test_cli_init.py
@ -0,0 +1,80 @@
+"""Tests for HermesCLI initialization -- catches configuration bugs
+that only manifest at runtime (not in mocked unit tests)."""
+
+import os
+import sys
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+
+
+def _make_cli(**kwargs):
+    """Create a HermesCLI instance with minimal mocking."""
+    from cli import HermesCLI
+    with patch("cli.get_tool_definitions", return_value=[]):
+        return HermesCLI(**kwargs)
+
+
+class TestMaxTurnsResolution:
+    """max_turns must always resolve to a positive integer, never None."""
+
+    def test_default_max_turns_is_integer(self):
+        cli = _make_cli()
+        assert isinstance(cli.max_turns, int)
+        assert cli.max_turns > 0
+
+    def test_explicit_max_turns_honored(self):
+        cli = _make_cli(max_turns=25)
+        assert cli.max_turns == 25
+
+    def test_none_max_turns_gets_default(self):
+        cli = _make_cli(max_turns=None)
+        assert isinstance(cli.max_turns, int)
+        assert cli.max_turns > 0
+
+    def test_env_var_max_turns(self, monkeypatch):
+        """Env var is used when config file doesn't set max_turns."""
+        monkeypatch.setenv("HERMES_MAX_ITERATIONS", "42")
+        import cli as cli_module
+        original = cli_module.CLI_CONFIG["agent"].get("max_turns")
+        cli_module.CLI_CONFIG["agent"]["max_turns"] = None
+        try:
+            cli_obj = _make_cli()
+            assert cli_obj.max_turns == 42
+        finally:
+            if original is not None:
+                cli_module.CLI_CONFIG["agent"]["max_turns"] = original
+
+    def test_max_turns_never_none_for_agent(self):
+        """The value passed to AIAgent must never be None (causes TypeError in run_conversation)."""
+        cli = _make_cli()
+        assert cli.max_turns is not None
+
+
+class TestVerboseAndToolProgress:
+    def test_default_verbose_is_bool(self):
+        cli = _make_cli()
+        assert isinstance(cli.verbose, bool)
+
+    def test_tool_progress_mode_is_string(self):
+        cli = _make_cli()
+        assert isinstance(cli.tool_progress_mode, str)
+        assert cli.tool_progress_mode in ("off", "new", "all", "verbose")
+
+
+class TestProviderResolution:
+    def test_api_key_is_string_or_none(self):
+        cli = _make_cli()
+        assert cli.api_key is None or isinstance(cli.api_key, str)
+
+    def test_base_url_is_string(self):
+        cli = _make_cli()
+        assert isinstance(cli.base_url, str)
+        assert cli.base_url.startswith("http")
+
+    def test_model_is_string(self):
+        cli = _make_cli()
+        assert isinstance(cli.model, str)
+        assert len(cli.model) > 0
--- a/tests/test_codex_execution_paths.py
+++ b/tests/test_codex_execution_paths.py
@ -149,6 +149,11 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch):
    runner._prefill_messages = []
    runner._reasoning_config = None
    runner._running_agents = {}
+    from unittest.mock import MagicMock, AsyncMock
+    runner.hooks = MagicMock()
+    runner.hooks.emit = AsyncMock()
+    runner.hooks.loaded_hooks = []
+    runner._session_db = None

    source = SessionSource(
        platform=Platform.LOCAL,
--- a/tests/test_external_credential_detection.py
+++ b/tests/test_external_credential_detection.py
@ -0,0 +1,51 @@
+"""Tests for detect_external_credentials() -- Phase 2 credential sync."""
+
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from hermes_cli.auth import detect_external_credentials
+
+
+class TestDetectCodexCLI:
+    def test_detects_valid_codex_auth(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        auth = codex_dir / "auth.json"
+        auth.write_text(json.dumps({
+            "tokens": {"access_token": "tok-123", "refresh_token": "ref-456"}
+        }))
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
+            result = detect_external_credentials()
+        codex_hits = [c for c in result if c["provider"] == "openai-codex"]
+        assert len(codex_hits) == 1
+        assert "Codex CLI" in codex_hits[0]["label"]
+        assert str(auth) == codex_hits[0]["path"]
+
+    def test_skips_codex_without_access_token(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        (codex_dir / "auth.json").write_text(json.dumps({"tokens": {}}))
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
+            result = detect_external_credentials()
+        assert not any(c["provider"] == "openai-codex" for c in result)
+
+    def test_skips_missing_codex_dir(self, tmp_path):
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / "nonexistent"):
+            result = detect_external_credentials()
+        assert not any(c["provider"] == "openai-codex" for c in result)
+
+    def test_skips_malformed_codex_auth(self, tmp_path):
+        codex_dir = tmp_path / ".codex"
+        codex_dir.mkdir()
+        (codex_dir / "auth.json").write_text("{bad json")
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=codex_dir):
+            result = detect_external_credentials()
+        assert not any(c["provider"] == "openai-codex" for c in result)
+
+    def test_returns_empty_when_nothing_found(self, tmp_path):
+        with patch("hermes_cli.auth.resolve_codex_home_path", return_value=tmp_path / ".codex"):
+            result = detect_external_credentials()
+        assert result == []
--- a/tests/test_flush_memories_codex.py
+++ b/tests/test_flush_memories_codex.py
@ -0,0 +1,225 @@
+"""Tests for flush_memories() working correctly across all provider modes.
+
+Catches the bug where Codex mode called chat.completions.create on a
+Responses-only client, which would fail silently or with a 404.
+"""
+
+import json
+import os
+import sys
+import types
+from types import SimpleNamespace
+from unittest.mock import patch, MagicMock, call
+
+import pytest
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+import run_agent
+
+
+class _FakeOpenAI:
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
+        self.api_key = kwargs.get("api_key", "test")
+        self.base_url = kwargs.get("base_url", "http://test")
+
+    def close(self):
+        pass
+
+
+def _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter"):
+    """Build an AIAgent with mocked internals, ready for flush_memories testing."""
+    monkeypatch.setattr(run_agent, "get_tool_definitions", lambda **kw: [
+        {
+            "type": "function",
+            "function": {
+                "name": "memory",
+                "description": "Manage memories.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "action": {"type": "string"},
+                        "target": {"type": "string"},
+                        "content": {"type": "string"},
+                    },
+                },
+            },
+        },
+    ])
+    monkeypatch.setattr(run_agent, "check_toolset_requirements", lambda: {})
+    monkeypatch.setattr(run_agent, "OpenAI", _FakeOpenAI)
+
+    agent = run_agent.AIAgent(
+        api_key="test-key",
+        base_url="https://test.example.com/v1",
+        provider=provider,
+        api_mode=api_mode,
+        max_iterations=4,
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    # Give it a valid memory store
+    agent._memory_store = MagicMock()
+    agent._memory_flush_min_turns = 1
+    agent._user_turn_count = 5
+    return agent
+
+
+def _chat_response_with_memory_call():
+    """Simulated chat completions response with a memory tool call."""
+    return SimpleNamespace(
+        choices=[SimpleNamespace(
+            message=SimpleNamespace(
+                content=None,
+                tool_calls=[SimpleNamespace(
+                    function=SimpleNamespace(
+                        name="memory",
+                        arguments=json.dumps({
+                            "action": "add",
+                            "target": "notes",
+                            "content": "User prefers dark mode.",
+                        }),
+                    ),
+                )],
+            ),
+        )],
+        usage=SimpleNamespace(prompt_tokens=100, completion_tokens=20, total_tokens=120),
+    )
+
+
+class TestFlushMemoriesUsesAuxiliaryClient:
+    """When an auxiliary client is available, flush_memories should use it
+    instead of self.client -- especially critical in Codex mode."""
+
+    def test_flush_uses_auxiliary_when_available(self, monkeypatch):
+        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
+
+        mock_aux_client = MagicMock()
+        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there"},
+                {"role": "user", "content": "Remember this"},
+            ]
+            with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
+                agent.flush_memories(messages)
+
+        mock_aux_client.chat.completions.create.assert_called_once()
+        call_kwargs = mock_aux_client.chat.completions.create.call_args
+        assert call_kwargs.kwargs.get("model") == "gpt-4o-mini" or call_kwargs[1].get("model") == "gpt-4o-mini"
+
+    def test_flush_uses_main_client_when_no_auxiliary(self, monkeypatch):
+        """Non-Codex mode with no auxiliary falls back to self.client."""
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+        agent.client = MagicMock()
+        agent.client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there"},
+                {"role": "user", "content": "Save this"},
+            ]
+            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
+                agent.flush_memories(messages)
+
+        agent.client.chat.completions.create.assert_called_once()
+
+    def test_flush_executes_memory_tool_calls(self, monkeypatch):
+        """Verify that memory tool calls from the flush response actually get executed."""
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+
+        mock_aux_client = MagicMock()
+        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+                {"role": "user", "content": "Note this"},
+            ]
+            with patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
+                agent.flush_memories(messages)
+
+        mock_memory.assert_called_once()
+        call_kwargs = mock_memory.call_args
+        assert call_kwargs.kwargs["action"] == "add"
+        assert call_kwargs.kwargs["target"] == "notes"
+        assert "dark mode" in call_kwargs.kwargs["content"]
+
+    def test_flush_strips_artifacts_from_messages(self, monkeypatch):
+        """After flush, the flush prompt and any response should be removed from messages."""
+        agent = _make_agent(monkeypatch, api_mode="chat_completions", provider="openrouter")
+
+        mock_aux_client = MagicMock()
+        mock_aux_client.chat.completions.create.return_value = _chat_response_with_memory_call()
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(mock_aux_client, "gpt-4o-mini")):
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+                {"role": "user", "content": "Remember X"},
+            ]
+            original_len = len(messages)
+            with patch("tools.memory_tool.memory_tool", return_value="Saved."):
+                agent.flush_memories(messages)
+
+        # Messages should not grow from the flush
+        assert len(messages) <= original_len
+        # No flush sentinel should remain
+        for msg in messages:
+            assert "_flush_sentinel" not in msg
+
+
+class TestFlushMemoriesCodexFallback:
+    """When no auxiliary client exists and we're in Codex mode, flush should
+    use the Codex Responses API path instead of chat.completions."""
+
+    def test_codex_mode_no_aux_uses_responses_api(self, monkeypatch):
+        agent = _make_agent(monkeypatch, api_mode="codex_responses", provider="openai-codex")
+
+        codex_response = SimpleNamespace(
+            output=[
+                SimpleNamespace(
+                    type="function_call",
+                    call_id="call_1",
+                    name="memory",
+                    arguments=json.dumps({
+                        "action": "add",
+                        "target": "notes",
+                        "content": "Codex flush test",
+                    }),
+                ),
+            ],
+            usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60),
+            status="completed",
+            model="gpt-5-codex",
+        )
+
+        with patch("agent.auxiliary_client.get_text_auxiliary_client", return_value=(None, None)), \
+             patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \
+             patch.object(agent, "_build_api_kwargs") as mock_build, \
+             patch("tools.memory_tool.memory_tool", return_value="Saved.") as mock_memory:
+            mock_build.return_value = {
+                "model": "gpt-5-codex",
+                "instructions": "test",
+                "input": [],
+                "tools": [],
+                "max_output_tokens": 4096,
+            }
+            messages = [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi"},
+                {"role": "user", "content": "Save this"},
+            ]
+            agent.flush_memories(messages)
+
+        mock_stream.assert_called_once()
+        mock_memory.assert_called_once()
+        assert mock_memory.call_args.kwargs["content"] == "Codex flush test"
--- a/tests/test_provider_parity.py
+++ b/tests/test_provider_parity.py
@ -0,0 +1,460 @@
+"""Provider parity tests: verify that AIAgent builds correct API kwargs
+and handles responses properly for all supported providers.
+
+Ensures changes to one provider path don't silently break another.
+"""
+
+import json
+import os
+import sys
+import types
+from types import SimpleNamespace
+from unittest.mock import patch, MagicMock
+
+import pytest
+
+sys.modules.setdefault("fire", types.SimpleNamespace(Fire=lambda *a, **k: None))
+sys.modules.setdefault("firecrawl", types.SimpleNamespace(Firecrawl=object))
+sys.modules.setdefault("fal_client", types.SimpleNamespace())
+
+from run_agent import AIAgent
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+def _tool_defs(*names):
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": n,
+                "description": f"{n} tool",
+                "parameters": {"type": "object", "properties": {}},
+            },
+        }
+        for n in names
+    ]
+
+
+class _FakeOpenAI:
+    def __init__(self, **kw):
+        self.api_key = kw.get("api_key", "test")
+        self.base_url = kw.get("base_url", "http://test")
+    def close(self):
+        pass
+
+
+def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1"):
+    monkeypatch.setattr("run_agent.get_tool_definitions", lambda **kw: _tool_defs("web_search", "terminal"))
+    monkeypatch.setattr("run_agent.check_toolset_requirements", lambda: {})
+    monkeypatch.setattr("run_agent.OpenAI", _FakeOpenAI)
+    return AIAgent(
+        api_key="test-key",
+        base_url=base_url,
+        provider=provider,
+        api_mode=api_mode,
+        max_iterations=4,
+        quiet_mode=True,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+
+
+# ── _build_api_kwargs tests ─────────────────────────────────────────────────
+
+class TestBuildApiKwargsOpenRouter:
+    def test_uses_chat_completions_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "messages" in kwargs
+        assert "model" in kwargs
+        assert kwargs["messages"][-1]["content"] == "hi"
+
+    def test_includes_reasoning_in_extra_body(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        extra = kwargs.get("extra_body", {})
+        assert "reasoning" in extra
+        assert extra["reasoning"]["enabled"] is True
+
+    def test_includes_tools(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "tools" in kwargs
+        tool_names = [t["function"]["name"] for t in kwargs["tools"]]
+        assert "web_search" in tool_names
+
+    def test_no_responses_api_fields(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "input" not in kwargs
+        assert "instructions" not in kwargs
+        assert "store" not in kwargs
+
+
+class TestBuildApiKwargsNousPortal:
+    def test_includes_nous_product_tags(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        extra = kwargs.get("extra_body", {})
+        assert extra.get("tags") == ["product=hermes-agent"]
+
+    def test_uses_chat_completions_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "nous", base_url="https://inference-api.nousresearch.com/v1")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "messages" in kwargs
+        assert "input" not in kwargs
+
+
+class TestBuildApiKwargsCustomEndpoint:
+    def test_uses_chat_completions_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "messages" in kwargs
+        assert "input" not in kwargs
+
+    def test_no_openrouter_extra_body(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "custom", base_url="http://localhost:1234/v1")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        extra = kwargs.get("extra_body", {})
+        assert "reasoning" not in extra
+
+
+class TestBuildApiKwargsCodex:
+    def test_uses_responses_api_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "input" in kwargs
+        assert "instructions" in kwargs
+        assert "messages" not in kwargs
+        assert kwargs["store"] is False
+
+    def test_includes_reasoning_config(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "reasoning" in kwargs
+        assert kwargs["reasoning"]["effort"] == "medium"
+
+    def test_includes_encrypted_content_in_include(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        assert "reasoning.encrypted_content" in kwargs.get("include", [])
+
+    def test_tools_converted_to_responses_format(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hi"}]
+        kwargs = agent._build_api_kwargs(messages)
+        tools = kwargs.get("tools", [])
+        assert len(tools) > 0
+        # Responses format has "name" at top level, not nested under "function"
+        assert "name" in tools[0]
+        assert "function" not in tools[0]
+
+
+# ── Message conversion tests ────────────────────────────────────────────────
+
+class TestChatMessagesToResponsesInput:
+    """Verify _chat_messages_to_responses_input for Codex mode."""
+
+    def test_user_message_passes_through(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "user", "content": "hello"}]
+        items = agent._chat_messages_to_responses_input(messages)
+        assert items == [{"role": "user", "content": "hello"}]
+
+    def test_system_messages_filtered(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [
+            {"role": "system", "content": "be helpful"},
+            {"role": "user", "content": "hello"},
+        ]
+        items = agent._chat_messages_to_responses_input(messages)
+        assert len(items) == 1
+        assert items[0]["role"] == "user"
+
+    def test_assistant_tool_calls_become_function_call_items(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{
+                "id": "call_abc",
+                "call_id": "call_abc",
+                "function": {"name": "web_search", "arguments": '{"query": "test"}'},
+            }],
+        }]
+        items = agent._chat_messages_to_responses_input(messages)
+        fc_items = [i for i in items if i.get("type") == "function_call"]
+        assert len(fc_items) == 1
+        assert fc_items[0]["name"] == "web_search"
+        assert fc_items[0]["call_id"] == "call_abc"
+
+    def test_tool_results_become_function_call_output(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [{"role": "tool", "tool_call_id": "call_abc", "content": "result here"}]
+        items = agent._chat_messages_to_responses_input(messages)
+        assert items[0]["type"] == "function_call_output"
+        assert items[0]["call_id"] == "call_abc"
+        assert items[0]["output"] == "result here"
+
+    def test_encrypted_reasoning_replayed(self, monkeypatch):
+        """Encrypted reasoning items from previous turns must be included in input."""
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [
+            {"role": "user", "content": "think about this"},
+            {
+                "role": "assistant",
+                "content": "I thought about it.",
+                "codex_reasoning_items": [
+                    {"type": "reasoning", "id": "rs_abc", "encrypted_content": "gAAAA_test_blob"},
+                ],
+            },
+            {"role": "user", "content": "continue"},
+        ]
+        items = agent._chat_messages_to_responses_input(messages)
+        reasoning_items = [i for i in items if i.get("type") == "reasoning"]
+        assert len(reasoning_items) == 1
+        assert reasoning_items[0]["encrypted_content"] == "gAAAA_test_blob"
+
+    def test_no_reasoning_items_for_non_codex_messages(self, monkeypatch):
+        """Messages without codex_reasoning_items should not inject anything."""
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        messages = [
+            {"role": "assistant", "content": "hi"},
+            {"role": "user", "content": "hello"},
+        ]
+        items = agent._chat_messages_to_responses_input(messages)
+        reasoning_items = [i for i in items if i.get("type") == "reasoning"]
+        assert len(reasoning_items) == 0
+
+
+# ── Response normalization tests ─────────────────────────────────────────────
+
+class TestNormalizeCodexResponse:
+    """Verify _normalize_codex_response extracts all fields correctly."""
+
+    def _make_codex_agent(self, monkeypatch):
+        return _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                           base_url="https://chatgpt.com/backend-api/codex")
+
+    def test_text_response(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="message", status="completed",
+                    content=[SimpleNamespace(type="output_text", text="Hello!")],
+                    phase="final_answer"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert msg.content == "Hello!"
+        assert reason == "stop"
+
+    def test_reasoning_summary_extracted(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="reasoning",
+                    encrypted_content="gAAAA_blob",
+                    summary=[SimpleNamespace(type="summary_text", text="Thinking about math")],
+                    id="rs_123", status=None),
+                SimpleNamespace(type="message", status="completed",
+                    content=[SimpleNamespace(type="output_text", text="42")],
+                    phase="final_answer"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert msg.content == "42"
+        assert "math" in msg.reasoning
+        assert reason == "stop"
+
+    def test_encrypted_content_captured(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="reasoning",
+                    encrypted_content="gAAAA_secret_blob_123",
+                    summary=[SimpleNamespace(type="summary_text", text="Thinking")],
+                    id="rs_456", status=None),
+                SimpleNamespace(type="message", status="completed",
+                    content=[SimpleNamespace(type="output_text", text="done")],
+                    phase="final_answer"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert msg.codex_reasoning_items is not None
+        assert len(msg.codex_reasoning_items) == 1
+        assert msg.codex_reasoning_items[0]["encrypted_content"] == "gAAAA_secret_blob_123"
+        assert msg.codex_reasoning_items[0]["id"] == "rs_456"
+
+    def test_no_encrypted_content_when_missing(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="message", status="completed",
+                    content=[SimpleNamespace(type="output_text", text="no reasoning")],
+                    phase="final_answer"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert msg.codex_reasoning_items is None
+
+    def test_tool_calls_extracted(self, monkeypatch):
+        agent = self._make_codex_agent(monkeypatch)
+        response = SimpleNamespace(
+            output=[
+                SimpleNamespace(type="function_call", status="completed",
+                    call_id="call_xyz", name="web_search",
+                    arguments='{"query":"test"}', id="fc_xyz"),
+            ],
+            status="completed",
+        )
+        msg, reason = agent._normalize_codex_response(response)
+        assert reason == "tool_calls"
+        assert len(msg.tool_calls) == 1
+        assert msg.tool_calls[0].function.name == "web_search"
+
+
+# ── Chat completions response handling (OpenRouter/Nous) ─────────────────────
+
+class TestBuildAssistantMessage:
+    """Verify _build_assistant_message works for all provider response formats."""
+
+    def test_openrouter_reasoning_fields(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        msg = SimpleNamespace(
+            content="answer",
+            tool_calls=None,
+            reasoning="I thought about it",
+            reasoning_content=None,
+            reasoning_details=None,
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert result["content"] == "answer"
+        assert result["reasoning"] == "I thought about it"
+        assert "codex_reasoning_items" not in result
+
+    def test_openrouter_reasoning_details_preserved_unmodified(self, monkeypatch):
+        """reasoning_details must be passed back exactly as received for
+        multi-turn continuity (OpenRouter, Anthropic, OpenAI all need this)."""
+        agent = _make_agent(monkeypatch, "openrouter")
+        original_detail = {
+            "type": "thinking",
+            "thinking": "deep thoughts here",
+            "signature": "sig123_opaque_blob",
+            "encrypted_content": "some_provider_blob",
+            "extra_field": "should_not_be_dropped",
+        }
+        msg = SimpleNamespace(
+            content="answer",
+            tool_calls=None,
+            reasoning=None,
+            reasoning_content=None,
+            reasoning_details=[original_detail],
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        stored = result["reasoning_details"][0]
+        # ALL fields must survive, not just type/text/signature
+        assert stored["signature"] == "sig123_opaque_blob"
+        assert stored["encrypted_content"] == "some_provider_blob"
+        assert stored["extra_field"] == "should_not_be_dropped"
+        assert stored["thinking"] == "deep thoughts here"
+
+    def test_codex_preserves_encrypted_reasoning(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openai-codex", api_mode="codex_responses",
+                            base_url="https://chatgpt.com/backend-api/codex")
+        msg = SimpleNamespace(
+            content="result",
+            tool_calls=None,
+            reasoning="summary text",
+            reasoning_content=None,
+            reasoning_details=None,
+            codex_reasoning_items=[
+                {"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"},
+            ],
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert result["codex_reasoning_items"] == [
+            {"type": "reasoning", "id": "rs_1", "encrypted_content": "gAAAA_blob"},
+        ]
+
+    def test_plain_message_no_codex_items(self, monkeypatch):
+        agent = _make_agent(monkeypatch, "openrouter")
+        msg = SimpleNamespace(
+            content="simple",
+            tool_calls=None,
+            reasoning=None,
+            reasoning_content=None,
+            reasoning_details=None,
+        )
+        result = agent._build_assistant_message(msg, "stop")
+        assert "codex_reasoning_items" not in result
+
+
+# ── Auxiliary client provider resolution ─────────────────────────────────────
+
+class TestAuxiliaryClientProviderPriority:
+    """Verify auxiliary client resolution doesn't break for any provider."""
+
+    def test_openrouter_always_wins(self, monkeypatch):
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
+        from agent.auxiliary_client import get_text_auxiliary_client
+        with patch("agent.auxiliary_client.OpenAI") as mock:
+            client, model = get_text_auxiliary_client()
+        assert model == "google/gemini-3-flash-preview"
+        assert "openrouter" in str(mock.call_args.kwargs["base_url"]).lower()
+
+    def test_nous_when_no_openrouter(self, monkeypatch):
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        from agent.auxiliary_client import get_text_auxiliary_client
+        with patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "nous-tok"}), \
+             patch("agent.auxiliary_client.OpenAI") as mock:
+            client, model = get_text_auxiliary_client()
+        assert model == "gemini-3-flash"
+
+    def test_custom_endpoint_when_no_nous(self, monkeypatch):
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1")
+        monkeypatch.setenv("OPENAI_API_KEY", "local-key")
+        from agent.auxiliary_client import get_text_auxiliary_client
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client.OpenAI") as mock:
+            client, model = get_text_auxiliary_client()
+        assert mock.call_args.kwargs["base_url"] == "http://localhost:1234/v1"
+
+    def test_codex_fallback_last_resort(self, monkeypatch):
+        monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
+        monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
+        monkeypatch.delenv("OPENAI_API_KEY", raising=False)
+        from agent.auxiliary_client import get_text_auxiliary_client, CodexAuxiliaryClient
+        with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \
+             patch("agent.auxiliary_client._read_codex_access_token", return_value="codex-tok"), \
+             patch("agent.auxiliary_client.OpenAI"):
+            client, model = get_text_auxiliary_client()
+        assert model == "gpt-5.3-codex"
+        assert isinstance(client, CodexAuxiliaryClient)
--- a/tests/test_run_agent_codex_responses.py
+++ b/tests/test_run_agent_codex_responses.py
@ -530,12 +530,27 @@ def test_preflight_codex_api_kwargs_rejects_function_call_output_without_call_id
 def test_preflight_codex_api_kwargs_rejects_unsupported_request_fields(monkeypatch):
    agent = _build_agent(monkeypatch)
    kwargs = _codex_request_kwargs()
-    kwargs["temperature"] = 0
+    kwargs["some_unknown_field"] = "value"

    with pytest.raises(ValueError, match="unsupported field"):
        agent._preflight_codex_api_kwargs(kwargs)


+def test_preflight_codex_api_kwargs_allows_reasoning_and_temperature(monkeypatch):
+    agent = _build_agent(monkeypatch)
+    kwargs = _codex_request_kwargs()
+    kwargs["reasoning"] = {"effort": "high", "summary": "auto"}
+    kwargs["include"] = ["reasoning.encrypted_content"]
+    kwargs["temperature"] = 0.7
+    kwargs["max_output_tokens"] = 4096
+
+    result = agent._preflight_codex_api_kwargs(kwargs)
+    assert result["reasoning"] == {"effort": "high", "summary": "auto"}
+    assert result["include"] == ["reasoning.encrypted_content"]
+    assert result["temperature"] == 0.7
+    assert result["max_output_tokens"] == 4096
+
+
 def test_run_conversation_codex_replay_payload_keeps_call_id(monkeypatch):
    agent = _build_agent(monkeypatch)
    responses = [_codex_tool_call_response(), _codex_message_response("done")]
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@ -24,26 +24,13 @@ from typing import Dict, Any, List, Optional

 from openai import AsyncOpenAI, OpenAI

-from agent.auxiliary_client import get_text_auxiliary_client
+from agent.auxiliary_client import get_async_text_auxiliary_client

-# Resolve the auxiliary client at import time so we have the model slug.
-# We build an AsyncOpenAI from the same credentials for async summarization.
-_aux_client, _SUMMARIZER_MODEL = get_text_auxiliary_client()
-_async_aux_client: AsyncOpenAI | None = None
-if _aux_client is not None:
-    _async_kwargs = {
-        "api_key": _aux_client.api_key,
-        "base_url": str(_aux_client.base_url),
-    }
-    if "openrouter" in str(_aux_client.base_url).lower():
-        _async_kwargs["default_headers"] = {
-            "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
-            "X-OpenRouter-Title": "Hermes Agent",
-                "X-OpenRouter-Categories": "productivity,cli-agent",
-        }
-    _async_aux_client = AsyncOpenAI(**_async_kwargs)
+# Resolve the async auxiliary client at import time so we have the model slug.
+# Handles Codex Responses API adapter transparently.
+_async_aux_client, _SUMMARIZER_MODEL = get_async_text_auxiliary_client()
 MAX_SESSION_CHARS = 100_000
-MAX_SUMMARY_TOKENS = 2000
+MAX_SUMMARY_TOKENS = 10000


 def _format_timestamp(ts) -> str:
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@ -48,7 +48,7 @@ import asyncio
 from typing import List, Dict, Any, Optional
 from firecrawl import Firecrawl
 from openai import AsyncOpenAI
-from agent.auxiliary_client import get_text_auxiliary_client
+from agent.auxiliary_client import get_async_text_auxiliary_client
 from tools.debug_helpers import DebugSession

 logger = logging.getLogger(__name__)
@ -67,21 +67,9 @@ def _get_firecrawl_client():

 DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000

-# Resolve auxiliary text client at module level; build an async wrapper.
-_aux_sync_client, DEFAULT_SUMMARIZER_MODEL = get_text_auxiliary_client()
-_aux_async_client: AsyncOpenAI | None = None
-if _aux_sync_client is not None:
-    _async_kwargs = {
-        "api_key": _aux_sync_client.api_key,
-        "base_url": str(_aux_sync_client.base_url),
-    }
-    if "openrouter" in str(_aux_sync_client.base_url).lower():
-        _async_kwargs["default_headers"] = {
-            "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
-            "X-OpenRouter-Title": "Hermes Agent",
-                "X-OpenRouter-Categories": "productivity,cli-agent",
-        }
-    _aux_async_client = AsyncOpenAI(**_async_kwargs)
+# Resolve async auxiliary client at module level.
+# Handles Codex Responses API adapter transparently.
+_aux_async_client, DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client()

 _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")

@ -174,7 +162,7 @@ async def _call_summarizer_llm(
    content: str, 
    context_str: str, 
    model: str, 
-    max_tokens: int = 4000,
+    max_tokens: int = 20000,
    is_chunk: bool = False,
    chunk_info: str = ""
 ) -> Optional[str]:
@ -306,7 +294,7 @@ async def _process_large_content_chunked(
                chunk_content, 
                context_str, 
                model, 
-                max_tokens=2000,
+                max_tokens=10000,
                is_chunk=True,
                chunk_info=chunk_info
            )
@ -374,7 +362,7 @@ Create a single, unified markdown summary."""
                {"role": "user", "content": synthesis_prompt}
            ],
            temperature=0.1,
-            **auxiliary_max_tokens_param(4000),
+            **auxiliary_max_tokens_param(20000),
            **({} if not _extra else {"extra_body": _extra}),
        )
        final_summary = response.choices[0].message.content.strip()