From e9c33171581d5b310c1a467247de3522ef73a99a Mon Sep 17 00:00:00 2001
From: Teknium <127238744+teknium1@users.noreply.github.com>
Date: Thu, 12 Mar 2026 05:58:48 -0700
Subject: [PATCH] =?UTF-8?q?fix:=20improve=20Kimi=20model=20selection=20?=
 =?UTF-8?q?=E2=80=94=20auto-detect=20endpoint,=20add=20missing=20models=20?=
 =?UTF-8?q?(#1039)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: /reasoning command output ordering, display, and inline think extraction

Three issues with the /reasoning command:

1. Output interleaving: The command echo used print() while feedback
   used _cprint(), causing them to render out-of-order under
   prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
   all output renders through the same path in correct order.

2. Reasoning display not working: /reasoning show toggled a flag
   but reasoning never appeared for models that embed thinking in
   inline <think> blocks rather than structured API fields. Added
   fallback extraction in _build_assistant_message to capture
   <think> block content as reasoning when no structured reasoning
   fields (reasoning, reasoning_content, reasoning_details) are
   present. This feeds into both the reasoning callback (during
   tool loops) and the post-response reasoning box display.

3. Feedback clarity: Added checkmarks to confirm actions, persisted
   show/hide to config (was session-only before), and aligned the
   status display for readability.

Tests: 7 new tests for inline think block extraction (41 total).

* feat: add /reasoning command to gateway (Telegram/Discord/etc)

The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:

1. /reasoning command handler in the gateway:
   - No args: shows current effort level and display state
   - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
   - /reasoning show|hide: toggles reasoning display in responses
   - All changes saved to config.yaml immediately

2. Reasoning display in gateway responses:
   - When show_reasoning is enabled, prepends a 'Reasoning' block
     with the model's last_reasoning content before the response
   - Collapses long reasoning (>15 lines) to keep messages readable
   - Uses last_reasoning from run_conversation result dict

3. Plumbing:
   - Added _show_reasoning attribute loaded from config at startup
   - Propagated last_reasoning through _run_agent return dict
   - Added /reasoning to help text and known_commands set
   - Uses getattr for _show_reasoning to handle test stubs

* fix: improve Kimi model selection — auto-detect endpoint, add missing models

Kimi Coding Plan setup:
- New dedicated _model_flow_kimi() replaces the generic API-key flow
  for kimi-coding. Removes the confusing 'Base URL' prompt entirely —
  the endpoint is auto-detected from the API key prefix:
    sk-kimi-* → api.kimi.com/coding/v1 (Kimi Coding Plan)
    other     → api.moonshot.ai/v1 (legacy Moonshot)

- Shows appropriate models for each endpoint:
    Coding Plan: kimi-for-coding, kimi-k2.5, kimi-k2-thinking, kimi-k2-thinking-turbo
    Moonshot:    full model catalog

- Clears any stale KIMI_BASE_URL override so runtime auto-detection
  via _resolve_kimi_base_url() works correctly.

Model catalog updates:
- Added kimi-for-coding (primary Coding Plan model) and kimi-k2-thinking-turbo
  to models.py, main.py _PROVIDER_MODELS, and model_metadata.py context windows.

- Updated User-Agent from KimiCLI/1.0 to KimiCLI/1.3 (Kimi's coding
  endpoint whitelists known coding agents via User-Agent sniffing).
---
 agent/model_metadata.py |   2 +
 hermes_cli/main.py      | 112 +++++++++++++++++++++++++++++++++++++++-
 hermes_cli/models.py    |   2 +
 run_agent.py            |   2 +-
 4 files changed, 115 insertions(+), 3 deletions(-)
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 3b2ab9d0..e8d1e51b 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -53,8 +53,10 @@ DEFAULT_CONTEXT_LENGTHS = {
     "glm-5": 202752,
     "glm-4.5": 131072,
     "glm-4.5-flash": 131072,
+    "kimi-for-coding": 262144,
     "kimi-k2.5": 262144,
     "kimi-k2-thinking": 262144,
+    "kimi-k2-thinking-turbo": 262144,
     "kimi-k2-turbo-preview": 262144,
     "kimi-k2-0905-preview": 131072,
     "MiniMax-M2.5": 204800,
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 78153535..ba357042 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -832,7 +832,9 @@ def cmd_model(args):
         _model_flow_named_custom(config, _custom_provider_map[selected_provider])
     elif selected_provider == "remove-custom":
         _remove_custom_provider(config)
-    elif selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn"):
+    elif selected_provider == "kimi-coding":
+        _model_flow_kimi(config, current_model)
+    elif selected_provider in ("zai", "minimax", "minimax-cn"):
         _model_flow_api_key_provider(config, selected_provider, current_model)
 
 
@@ -1343,8 +1345,10 @@ _PROVIDER_MODELS = {
         "glm-4.5-flash",
     ],
     "kimi-coding": [
+        "kimi-for-coding",
         "kimi-k2.5",
         "kimi-k2-thinking",
+        "kimi-k2-thinking-turbo",
         "kimi-k2-turbo-preview",
         "kimi-k2-0905-preview",
     ],
@@ -1361,8 +1365,112 @@ _PROVIDER_MODELS = {
 }
 
 
+def _model_flow_kimi(config, current_model=""):
+    """Kimi / Moonshot model selection with automatic endpoint routing.
+
+    - sk-kimi-* keys   → api.kimi.com/coding/v1  (Kimi Coding Plan)
+    - Other keys        → api.moonshot.ai/v1      (legacy Moonshot)
+
+    No manual base URL prompt — endpoint is determined by key prefix.
+    """
+    from hermes_cli.auth import (
+        PROVIDER_REGISTRY, KIMI_CODE_BASE_URL, _prompt_model_selection,
+        _save_model_choice, deactivate_provider,
+    )
+    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
+
+    provider_id = "kimi-coding"
+    pconfig = PROVIDER_REGISTRY[provider_id]
+    key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
+    base_url_env = pconfig.base_url_env_var or ""
+
+    # Step 1: Check / prompt for API key
+    existing_key = ""
+    for ev in pconfig.api_key_env_vars:
+        existing_key = get_env_value(ev) or os.getenv(ev, "")
+        if existing_key:
+            break
+
+    if not existing_key:
+        print(f"No {pconfig.name} API key configured.")
+        if key_env:
+            try:
+                new_key = input(f"{key_env} (or Enter to cancel): ").strip()
+            except (KeyboardInterrupt, EOFError):
+                print()
+                return
+            if not new_key:
+                print("Cancelled.")
+                return
+            save_env_value(key_env, new_key)
+            existing_key = new_key
+            print("API key saved.")
+            print()
+    else:
+        print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓")
+        print()
+
+    # Step 2: Auto-detect endpoint from key prefix
+    is_coding_plan = existing_key.startswith("sk-kimi-")
+    if is_coding_plan:
+        effective_base = KIMI_CODE_BASE_URL
+        print(f"  Detected Kimi Coding Plan key → {effective_base}")
+    else:
+        effective_base = pconfig.inference_base_url
+        print(f"  Using Moonshot endpoint → {effective_base}")
+    # Clear any manual base URL override so auto-detection works at runtime
+    if base_url_env and get_env_value(base_url_env):
+        save_env_value(base_url_env, "")
+    print()
+
+    # Step 3: Model selection — show appropriate models for the endpoint
+    if is_coding_plan:
+        # Coding Plan models (kimi-for-coding first)
+        model_list = [
+            "kimi-for-coding",
+            "kimi-k2.5",
+            "kimi-k2-thinking",
+            "kimi-k2-thinking-turbo",
+        ]
+    else:
+        # Legacy Moonshot models
+        model_list = _PROVIDER_MODELS.get(provider_id, [])
+
+    if model_list:
+        selected = _prompt_model_selection(model_list, current_model=current_model)
+    else:
+        try:
+            selected = input("Enter model name: ").strip()
+        except (KeyboardInterrupt, EOFError):
+            selected = None
+
+    if selected:
+        # Clear custom endpoint if set (avoid confusion)
+        if get_env_value("OPENAI_BASE_URL"):
+            save_env_value("OPENAI_BASE_URL", "")
+            save_env_value("OPENAI_API_KEY", "")
+
+        _save_model_choice(selected)
+
+        # Update config with provider and base URL
+        cfg = load_config()
+        model = cfg.get("model")
+        if not isinstance(model, dict):
+            model = {"default": model} if model else {}
+            cfg["model"] = model
+        model["provider"] = provider_id
+        model["base_url"] = effective_base
+        save_config(cfg)
+        deactivate_provider()
+
+        endpoint_label = "Kimi Coding" if is_coding_plan else "Moonshot"
+        print(f"Default model set to: {selected} (via {endpoint_label})")
+    else:
+        print("No change.")
+
+
 def _model_flow_api_key_provider(config, provider_id, current_model=""):
-    """Generic flow for API-key providers (z.ai, Kimi, MiniMax)."""
+    """Generic flow for API-key providers (z.ai, MiniMax)."""
     from hermes_cli.auth import (
         PROVIDER_REGISTRY, _prompt_model_selection, _save_model_choice,
         _update_config_for_provider, deactivate_provider,
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index d07da105..92dcbf97 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -51,8 +51,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
         "glm-4.5-flash",
     ],
     "kimi-coding": [
+        "kimi-for-coding",
         "kimi-k2.5",
         "kimi-k2-thinking",
+        "kimi-k2-thinking-turbo",
         "kimi-k2-turbo-preview",
         "kimi-k2-0905-preview",
     ],
diff --git a/run_agent.py b/run_agent.py
index 0d6fe545..7808435d 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -438,7 +438,7 @@ class AIAgent:
                 }
             elif "api.kimi.com" in effective_base.lower():
                 client_kwargs["default_headers"] = {
-                    "User-Agent": "KimiCLI/1.0",
+                    "User-Agent": "KimiCLI/1.3",
                 }
         else:
             # No explicit creds — use the centralized provider router