fix: improve Kimi model selection — auto-detect endpoint, add missing models (#1039)

* fix: /reasoning command output ordering, display, and inline think extraction Three issues with the /reasoning command: 1. Output interleaving: The command echo used print() while feedback used _cprint(), causing them to render out-of-order under prompt_toolkit's patch_stdout. Changed echo to use _cprint() so all output renders through the same path in correct order. 2. Reasoning display not working: /reasoning show toggled a flag but reasoning never appeared for models that embed thinking in inline <think> blocks rather than structured API fields. Added fallback extraction in _build_assistant_message to capture <think> block content as reasoning when no structured reasoning fields (reasoning, reasoning_content, reasoning_details) are present. This feeds into both the reasoning callback (during tool loops) and the post-response reasoning box display. 3. Feedback clarity: Added checkmarks to confirm actions, persisted show/hide to config (was session-only before), and aligned the status display for readability. Tests: 7 new tests for inline think block extraction (41 total). * feat: add /reasoning command to gateway (Telegram/Discord/etc) The /reasoning command only existed in the CLI — messaging platforms had no way to view or change reasoning settings. This adds: 1. /reasoning command handler in the gateway: - No args: shows current effort level and display state - /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh) - /reasoning show|hide: toggles reasoning display in responses - All changes saved to config.yaml immediately 2. Reasoning display in gateway responses: - When show_reasoning is enabled, prepends a 'Reasoning' block with the model's last_reasoning content before the response - Collapses long reasoning (>15 lines) to keep messages readable - Uses last_reasoning from run_conversation result dict 3. Plumbing: - Added _show_reasoning attribute loaded from config at startup - Propagated last_reasoning through _run_agent return dict - Added /reasoning to help text and known_commands set - Uses getattr for _show_reasoning to handle test stubs * fix: improve Kimi model selection — auto-detect endpoint, add missing models Kimi Coding Plan setup: - New dedicated _model_flow_kimi() replaces the generic API-key flow for kimi-coding. Removes the confusing 'Base URL' prompt entirely — the endpoint is auto-detected from the API key prefix: sk-kimi-* → api.kimi.com/coding/v1 (Kimi Coding Plan) other → api.moonshot.ai/v1 (legacy Moonshot) - Shows appropriate models for each endpoint: Coding Plan: kimi-for-coding, kimi-k2.5, kimi-k2-thinking, kimi-k2-thinking-turbo Moonshot: full model catalog - Clears any stale KIMI_BASE_URL override so runtime auto-detection via _resolve_kimi_base_url() works correctly. Model catalog updates: - Added kimi-for-coding (primary Coding Plan model) and kimi-k2-thinking-turbo to models.py, main.py _PROVIDER_MODELS, and model_metadata.py context windows. - Updated User-Agent from KimiCLI/1.0 to KimiCLI/1.3 (Kimi's coding endpoint whitelists known coding agents via User-Agent sniffing).
2026-03-12 05:58:48 -07:00 · 2026-03-12 05:58:48 -07:00 · e9c3317158
commit e9c3317158
parent 1e3607150c
4 changed files with 115 additions and 3 deletions
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -53,8 +53,10 @@ DEFAULT_CONTEXT_LENGTHS = {
    "glm-5": 202752,
    "glm-4.5": 131072,
    "glm-4.5-flash": 131072,
    "kimi-for-coding": 262144,
    "kimi-k2.5": 262144,
    "kimi-k2-thinking": 262144,
    "kimi-k2-thinking-turbo": 262144,
    "kimi-k2-turbo-preview": 262144,
    "kimi-k2-0905-preview": 131072,
    "MiniMax-M2.5": 204800,
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -832,7 +832,9 @@ def cmd_model(args):
        _model_flow_named_custom(config, _custom_provider_map[selected_provider])
    elif selected_provider == "remove-custom":
        _remove_custom_provider(config)
-    elif selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn"):
+    elif selected_provider == "kimi-coding":
        _model_flow_kimi(config, current_model)
    elif selected_provider in ("zai", "minimax", "minimax-cn"):
        _model_flow_api_key_provider(config, selected_provider, current_model)
@ -1343,8 +1345,10 @@ _PROVIDER_MODELS = {
        "glm-4.5-flash",
    ],
    "kimi-coding": [
        "kimi-for-coding",
        "kimi-k2.5",
        "kimi-k2-thinking",
        "kimi-k2-thinking-turbo",
        "kimi-k2-turbo-preview",
        "kimi-k2-0905-preview",
    ],
@ -1361,8 +1365,112 @@ _PROVIDER_MODELS = {
 }
 def _model_flow_kimi(config, current_model=""):
    """Kimi / Moonshot model selection with automatic endpoint routing.
    - sk-kimi-* keys   → api.kimi.com/coding/v1  (Kimi Coding Plan)
    - Other keys        → api.moonshot.ai/v1      (legacy Moonshot)
    No manual base URL prompt — endpoint is determined by key prefix.
    """
    from hermes_cli.auth import (
        PROVIDER_REGISTRY, KIMI_CODE_BASE_URL, _prompt_model_selection,
        _save_model_choice, deactivate_provider,
    )
    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
    provider_id = "kimi-coding"
    pconfig = PROVIDER_REGISTRY[provider_id]
    key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
    base_url_env = pconfig.base_url_env_var or ""
    # Step 1: Check / prompt for API key
    existing_key = ""
    for ev in pconfig.api_key_env_vars:
        existing_key = get_env_value(ev) or os.getenv(ev, "")
        if existing_key:
            break
    if not existing_key:
        print(f"No {pconfig.name} API key configured.")
        if key_env:
            try:
                new_key = input(f"{key_env} (or Enter to cancel): ").strip()
            except (KeyboardInterrupt, EOFError):
                print()
                return
            if not new_key:
                print("Cancelled.")
                return
            save_env_value(key_env, new_key)
            existing_key = new_key
            print("API key saved.")
            print()
    else:
        print(f"  {pconfig.name} API key: {existing_key[:8]}... ✓")
        print()
    # Step 2: Auto-detect endpoint from key prefix
    is_coding_plan = existing_key.startswith("sk-kimi-")
    if is_coding_plan:
        effective_base = KIMI_CODE_BASE_URL
        print(f"  Detected Kimi Coding Plan key → {effective_base}")
    else:
        effective_base = pconfig.inference_base_url
        print(f"  Using Moonshot endpoint → {effective_base}")
    # Clear any manual base URL override so auto-detection works at runtime
    if base_url_env and get_env_value(base_url_env):
        save_env_value(base_url_env, "")
    print()
    # Step 3: Model selection — show appropriate models for the endpoint
    if is_coding_plan:
        # Coding Plan models (kimi-for-coding first)
        model_list = [
            "kimi-for-coding",
            "kimi-k2.5",
            "kimi-k2-thinking",
            "kimi-k2-thinking-turbo",
        ]
    else:
        # Legacy Moonshot models
        model_list = _PROVIDER_MODELS.get(provider_id, [])
    if model_list:
        selected = _prompt_model_selection(model_list, current_model=current_model)
    else:
        try:
            selected = input("Enter model name: ").strip()
        except (KeyboardInterrupt, EOFError):
            selected = None
    if selected:
        # Clear custom endpoint if set (avoid confusion)
        if get_env_value("OPENAI_BASE_URL"):
            save_env_value("OPENAI_BASE_URL", "")
            save_env_value("OPENAI_API_KEY", "")
        _save_model_choice(selected)
        # Update config with provider and base URL
        cfg = load_config()
        model = cfg.get("model")
        if not isinstance(model, dict):
            model = {"default": model} if model else {}
            cfg["model"] = model
        model["provider"] = provider_id
        model["base_url"] = effective_base
        save_config(cfg)
        deactivate_provider()
        endpoint_label = "Kimi Coding" if is_coding_plan else "Moonshot"
        print(f"Default model set to: {selected} (via {endpoint_label})")
    else:
        print("No change.")
 def _model_flow_api_key_provider(config, provider_id, current_model=""):
-    """Generic flow for API-key providers (z.ai, Kimi, MiniMax)."""
+    """Generic flow for API-key providers (z.ai, MiniMax)."""
    from hermes_cli.auth import (
        PROVIDER_REGISTRY, _prompt_model_selection, _save_model_choice,
        _update_config_for_provider, deactivate_provider,
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@ -51,8 +51,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "glm-4.5-flash",
    ],
    "kimi-coding": [
        "kimi-for-coding",
        "kimi-k2.5",
        "kimi-k2-thinking",
        "kimi-k2-thinking-turbo",
        "kimi-k2-turbo-preview",
        "kimi-k2-0905-preview",
    ],
--- a/run_agent.py
+++ b/run_agent.py
@ -438,7 +438,7 @@ class AIAgent:
                }
            elif "api.kimi.com" in effective_base.lower():
                client_kwargs["default_headers"] = {
-                    "User-Agent": "KimiCLI/1.0",
+                    "User-Agent": "KimiCLI/1.3",
                }
        else:
            # No explicit creds — use the centralized provider router