From e9c33171581d5b310c1a467247de3522ef73a99a Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 12 Mar 2026 05:58:48 -0700 Subject: [PATCH] =?UTF-8?q?fix:=20improve=20Kimi=20model=20selection=20?= =?UTF-8?q?=E2=80=94=20auto-detect=20endpoint,=20add=20missing=20models=20?= =?UTF-8?q?(#1039)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: /reasoning command output ordering, display, and inline think extraction Three issues with the /reasoning command: 1. Output interleaving: The command echo used print() while feedback used _cprint(), causing them to render out-of-order under prompt_toolkit's patch_stdout. Changed echo to use _cprint() so all output renders through the same path in correct order. 2. Reasoning display not working: /reasoning show toggled a flag but reasoning never appeared for models that embed thinking in inline blocks rather than structured API fields. Added fallback extraction in _build_assistant_message to capture block content as reasoning when no structured reasoning fields (reasoning, reasoning_content, reasoning_details) are present. This feeds into both the reasoning callback (during tool loops) and the post-response reasoning box display. 3. Feedback clarity: Added checkmarks to confirm actions, persisted show/hide to config (was session-only before), and aligned the status display for readability. Tests: 7 new tests for inline think block extraction (41 total). * feat: add /reasoning command to gateway (Telegram/Discord/etc) The /reasoning command only existed in the CLI — messaging platforms had no way to view or change reasoning settings. This adds: 1. /reasoning command handler in the gateway: - No args: shows current effort level and display state - /reasoning : sets reasoning effort (none/low/medium/high/xhigh) - /reasoning show|hide: toggles reasoning display in responses - All changes saved to config.yaml immediately 2. Reasoning display in gateway responses: - When show_reasoning is enabled, prepends a 'Reasoning' block with the model's last_reasoning content before the response - Collapses long reasoning (>15 lines) to keep messages readable - Uses last_reasoning from run_conversation result dict 3. Plumbing: - Added _show_reasoning attribute loaded from config at startup - Propagated last_reasoning through _run_agent return dict - Added /reasoning to help text and known_commands set - Uses getattr for _show_reasoning to handle test stubs * fix: improve Kimi model selection — auto-detect endpoint, add missing models Kimi Coding Plan setup: - New dedicated _model_flow_kimi() replaces the generic API-key flow for kimi-coding. Removes the confusing 'Base URL' prompt entirely — the endpoint is auto-detected from the API key prefix: sk-kimi-* → api.kimi.com/coding/v1 (Kimi Coding Plan) other → api.moonshot.ai/v1 (legacy Moonshot) - Shows appropriate models for each endpoint: Coding Plan: kimi-for-coding, kimi-k2.5, kimi-k2-thinking, kimi-k2-thinking-turbo Moonshot: full model catalog - Clears any stale KIMI_BASE_URL override so runtime auto-detection via _resolve_kimi_base_url() works correctly. Model catalog updates: - Added kimi-for-coding (primary Coding Plan model) and kimi-k2-thinking-turbo to models.py, main.py _PROVIDER_MODELS, and model_metadata.py context windows. - Updated User-Agent from KimiCLI/1.0 to KimiCLI/1.3 (Kimi's coding endpoint whitelists known coding agents via User-Agent sniffing). --- agent/model_metadata.py | 2 + hermes_cli/main.py | 112 +++++++++++++++++++++++++++++++++++++++- hermes_cli/models.py | 2 + run_agent.py | 2 +- 4 files changed, 115 insertions(+), 3 deletions(-) diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 3b2ab9d0..e8d1e51b 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -53,8 +53,10 @@ DEFAULT_CONTEXT_LENGTHS = { "glm-5": 202752, "glm-4.5": 131072, "glm-4.5-flash": 131072, + "kimi-for-coding": 262144, "kimi-k2.5": 262144, "kimi-k2-thinking": 262144, + "kimi-k2-thinking-turbo": 262144, "kimi-k2-turbo-preview": 262144, "kimi-k2-0905-preview": 131072, "MiniMax-M2.5": 204800, diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 78153535..ba357042 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -832,7 +832,9 @@ def cmd_model(args): _model_flow_named_custom(config, _custom_provider_map[selected_provider]) elif selected_provider == "remove-custom": _remove_custom_provider(config) - elif selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn"): + elif selected_provider == "kimi-coding": + _model_flow_kimi(config, current_model) + elif selected_provider in ("zai", "minimax", "minimax-cn"): _model_flow_api_key_provider(config, selected_provider, current_model) @@ -1343,8 +1345,10 @@ _PROVIDER_MODELS = { "glm-4.5-flash", ], "kimi-coding": [ + "kimi-for-coding", "kimi-k2.5", "kimi-k2-thinking", + "kimi-k2-thinking-turbo", "kimi-k2-turbo-preview", "kimi-k2-0905-preview", ], @@ -1361,8 +1365,112 @@ _PROVIDER_MODELS = { } +def _model_flow_kimi(config, current_model=""): + """Kimi / Moonshot model selection with automatic endpoint routing. + + - sk-kimi-* keys → api.kimi.com/coding/v1 (Kimi Coding Plan) + - Other keys → api.moonshot.ai/v1 (legacy Moonshot) + + No manual base URL prompt — endpoint is determined by key prefix. + """ + from hermes_cli.auth import ( + PROVIDER_REGISTRY, KIMI_CODE_BASE_URL, _prompt_model_selection, + _save_model_choice, deactivate_provider, + ) + from hermes_cli.config import get_env_value, save_env_value, load_config, save_config + + provider_id = "kimi-coding" + pconfig = PROVIDER_REGISTRY[provider_id] + key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else "" + base_url_env = pconfig.base_url_env_var or "" + + # Step 1: Check / prompt for API key + existing_key = "" + for ev in pconfig.api_key_env_vars: + existing_key = get_env_value(ev) or os.getenv(ev, "") + if existing_key: + break + + if not existing_key: + print(f"No {pconfig.name} API key configured.") + if key_env: + try: + new_key = input(f"{key_env} (or Enter to cancel): ").strip() + except (KeyboardInterrupt, EOFError): + print() + return + if not new_key: + print("Cancelled.") + return + save_env_value(key_env, new_key) + existing_key = new_key + print("API key saved.") + print() + else: + print(f" {pconfig.name} API key: {existing_key[:8]}... ✓") + print() + + # Step 2: Auto-detect endpoint from key prefix + is_coding_plan = existing_key.startswith("sk-kimi-") + if is_coding_plan: + effective_base = KIMI_CODE_BASE_URL + print(f" Detected Kimi Coding Plan key → {effective_base}") + else: + effective_base = pconfig.inference_base_url + print(f" Using Moonshot endpoint → {effective_base}") + # Clear any manual base URL override so auto-detection works at runtime + if base_url_env and get_env_value(base_url_env): + save_env_value(base_url_env, "") + print() + + # Step 3: Model selection — show appropriate models for the endpoint + if is_coding_plan: + # Coding Plan models (kimi-for-coding first) + model_list = [ + "kimi-for-coding", + "kimi-k2.5", + "kimi-k2-thinking", + "kimi-k2-thinking-turbo", + ] + else: + # Legacy Moonshot models + model_list = _PROVIDER_MODELS.get(provider_id, []) + + if model_list: + selected = _prompt_model_selection(model_list, current_model=current_model) + else: + try: + selected = input("Enter model name: ").strip() + except (KeyboardInterrupt, EOFError): + selected = None + + if selected: + # Clear custom endpoint if set (avoid confusion) + if get_env_value("OPENAI_BASE_URL"): + save_env_value("OPENAI_BASE_URL", "") + save_env_value("OPENAI_API_KEY", "") + + _save_model_choice(selected) + + # Update config with provider and base URL + cfg = load_config() + model = cfg.get("model") + if not isinstance(model, dict): + model = {"default": model} if model else {} + cfg["model"] = model + model["provider"] = provider_id + model["base_url"] = effective_base + save_config(cfg) + deactivate_provider() + + endpoint_label = "Kimi Coding" if is_coding_plan else "Moonshot" + print(f"Default model set to: {selected} (via {endpoint_label})") + else: + print("No change.") + + def _model_flow_api_key_provider(config, provider_id, current_model=""): - """Generic flow for API-key providers (z.ai, Kimi, MiniMax).""" + """Generic flow for API-key providers (z.ai, MiniMax).""" from hermes_cli.auth import ( PROVIDER_REGISTRY, _prompt_model_selection, _save_model_choice, _update_config_for_provider, deactivate_provider, diff --git a/hermes_cli/models.py b/hermes_cli/models.py index d07da105..92dcbf97 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -51,8 +51,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "glm-4.5-flash", ], "kimi-coding": [ + "kimi-for-coding", "kimi-k2.5", "kimi-k2-thinking", + "kimi-k2-thinking-turbo", "kimi-k2-turbo-preview", "kimi-k2-0905-preview", ], diff --git a/run_agent.py b/run_agent.py index 0d6fe545..7808435d 100644 --- a/run_agent.py +++ b/run_agent.py @@ -438,7 +438,7 @@ class AIAgent: } elif "api.kimi.com" in effective_base.lower(): client_kwargs["default_headers"] = { - "User-Agent": "KimiCLI/1.0", + "User-Agent": "KimiCLI/1.3", } else: # No explicit creds — use the centralized provider router