fix: improve Kimi model selection — auto-detect endpoint, add missing models (#1039)
* fix: /reasoning command output ordering, display, and inline think extraction
Three issues with the /reasoning command:
1. Output interleaving: The command echo used print() while feedback
used _cprint(), causing them to render out-of-order under
prompt_toolkit's patch_stdout. Changed echo to use _cprint() so
all output renders through the same path in correct order.
2. Reasoning display not working: /reasoning show toggled a flag
but reasoning never appeared for models that embed thinking in
inline <think> blocks rather than structured API fields. Added
fallback extraction in _build_assistant_message to capture
<think> block content as reasoning when no structured reasoning
fields (reasoning, reasoning_content, reasoning_details) are
present. This feeds into both the reasoning callback (during
tool loops) and the post-response reasoning box display.
3. Feedback clarity: Added checkmarks to confirm actions, persisted
show/hide to config (was session-only before), and aligned the
status display for readability.
Tests: 7 new tests for inline think block extraction (41 total).
* feat: add /reasoning command to gateway (Telegram/Discord/etc)
The /reasoning command only existed in the CLI — messaging platforms
had no way to view or change reasoning settings. This adds:
1. /reasoning command handler in the gateway:
- No args: shows current effort level and display state
- /reasoning <level>: sets reasoning effort (none/low/medium/high/xhigh)
- /reasoning show|hide: toggles reasoning display in responses
- All changes saved to config.yaml immediately
2. Reasoning display in gateway responses:
- When show_reasoning is enabled, prepends a 'Reasoning' block
with the model's last_reasoning content before the response
- Collapses long reasoning (>15 lines) to keep messages readable
- Uses last_reasoning from run_conversation result dict
3. Plumbing:
- Added _show_reasoning attribute loaded from config at startup
- Propagated last_reasoning through _run_agent return dict
- Added /reasoning to help text and known_commands set
- Uses getattr for _show_reasoning to handle test stubs
* fix: improve Kimi model selection — auto-detect endpoint, add missing models
Kimi Coding Plan setup:
- New dedicated _model_flow_kimi() replaces the generic API-key flow
for kimi-coding. Removes the confusing 'Base URL' prompt entirely —
the endpoint is auto-detected from the API key prefix:
sk-kimi-* → api.kimi.com/coding/v1 (Kimi Coding Plan)
other → api.moonshot.ai/v1 (legacy Moonshot)
- Shows appropriate models for each endpoint:
Coding Plan: kimi-for-coding, kimi-k2.5, kimi-k2-thinking, kimi-k2-thinking-turbo
Moonshot: full model catalog
- Clears any stale KIMI_BASE_URL override so runtime auto-detection
via _resolve_kimi_base_url() works correctly.
Model catalog updates:
- Added kimi-for-coding (primary Coding Plan model) and kimi-k2-thinking-turbo
to models.py, main.py _PROVIDER_MODELS, and model_metadata.py context windows.
- Updated User-Agent from KimiCLI/1.0 to KimiCLI/1.3 (Kimi's coding
endpoint whitelists known coding agents via User-Agent sniffing).
This commit is contained in:
parent
1e3607150c
commit
e9c3317158
4 changed files with 115 additions and 3 deletions
|
|
@ -53,8 +53,10 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||||
"glm-5": 202752,
|
"glm-5": 202752,
|
||||||
"glm-4.5": 131072,
|
"glm-4.5": 131072,
|
||||||
"glm-4.5-flash": 131072,
|
"glm-4.5-flash": 131072,
|
||||||
|
"kimi-for-coding": 262144,
|
||||||
"kimi-k2.5": 262144,
|
"kimi-k2.5": 262144,
|
||||||
"kimi-k2-thinking": 262144,
|
"kimi-k2-thinking": 262144,
|
||||||
|
"kimi-k2-thinking-turbo": 262144,
|
||||||
"kimi-k2-turbo-preview": 262144,
|
"kimi-k2-turbo-preview": 262144,
|
||||||
"kimi-k2-0905-preview": 131072,
|
"kimi-k2-0905-preview": 131072,
|
||||||
"MiniMax-M2.5": 204800,
|
"MiniMax-M2.5": 204800,
|
||||||
|
|
|
||||||
|
|
@ -832,7 +832,9 @@ def cmd_model(args):
|
||||||
_model_flow_named_custom(config, _custom_provider_map[selected_provider])
|
_model_flow_named_custom(config, _custom_provider_map[selected_provider])
|
||||||
elif selected_provider == "remove-custom":
|
elif selected_provider == "remove-custom":
|
||||||
_remove_custom_provider(config)
|
_remove_custom_provider(config)
|
||||||
elif selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn"):
|
elif selected_provider == "kimi-coding":
|
||||||
|
_model_flow_kimi(config, current_model)
|
||||||
|
elif selected_provider in ("zai", "minimax", "minimax-cn"):
|
||||||
_model_flow_api_key_provider(config, selected_provider, current_model)
|
_model_flow_api_key_provider(config, selected_provider, current_model)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1343,8 +1345,10 @@ _PROVIDER_MODELS = {
|
||||||
"glm-4.5-flash",
|
"glm-4.5-flash",
|
||||||
],
|
],
|
||||||
"kimi-coding": [
|
"kimi-coding": [
|
||||||
|
"kimi-for-coding",
|
||||||
"kimi-k2.5",
|
"kimi-k2.5",
|
||||||
"kimi-k2-thinking",
|
"kimi-k2-thinking",
|
||||||
|
"kimi-k2-thinking-turbo",
|
||||||
"kimi-k2-turbo-preview",
|
"kimi-k2-turbo-preview",
|
||||||
"kimi-k2-0905-preview",
|
"kimi-k2-0905-preview",
|
||||||
],
|
],
|
||||||
|
|
@ -1361,8 +1365,112 @@ _PROVIDER_MODELS = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _model_flow_kimi(config, current_model=""):
|
||||||
|
"""Kimi / Moonshot model selection with automatic endpoint routing.
|
||||||
|
|
||||||
|
- sk-kimi-* keys → api.kimi.com/coding/v1 (Kimi Coding Plan)
|
||||||
|
- Other keys → api.moonshot.ai/v1 (legacy Moonshot)
|
||||||
|
|
||||||
|
No manual base URL prompt — endpoint is determined by key prefix.
|
||||||
|
"""
|
||||||
|
from hermes_cli.auth import (
|
||||||
|
PROVIDER_REGISTRY, KIMI_CODE_BASE_URL, _prompt_model_selection,
|
||||||
|
_save_model_choice, deactivate_provider,
|
||||||
|
)
|
||||||
|
from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
|
||||||
|
|
||||||
|
provider_id = "kimi-coding"
|
||||||
|
pconfig = PROVIDER_REGISTRY[provider_id]
|
||||||
|
key_env = pconfig.api_key_env_vars[0] if pconfig.api_key_env_vars else ""
|
||||||
|
base_url_env = pconfig.base_url_env_var or ""
|
||||||
|
|
||||||
|
# Step 1: Check / prompt for API key
|
||||||
|
existing_key = ""
|
||||||
|
for ev in pconfig.api_key_env_vars:
|
||||||
|
existing_key = get_env_value(ev) or os.getenv(ev, "")
|
||||||
|
if existing_key:
|
||||||
|
break
|
||||||
|
|
||||||
|
if not existing_key:
|
||||||
|
print(f"No {pconfig.name} API key configured.")
|
||||||
|
if key_env:
|
||||||
|
try:
|
||||||
|
new_key = input(f"{key_env} (or Enter to cancel): ").strip()
|
||||||
|
except (KeyboardInterrupt, EOFError):
|
||||||
|
print()
|
||||||
|
return
|
||||||
|
if not new_key:
|
||||||
|
print("Cancelled.")
|
||||||
|
return
|
||||||
|
save_env_value(key_env, new_key)
|
||||||
|
existing_key = new_key
|
||||||
|
print("API key saved.")
|
||||||
|
print()
|
||||||
|
else:
|
||||||
|
print(f" {pconfig.name} API key: {existing_key[:8]}... ✓")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Step 2: Auto-detect endpoint from key prefix
|
||||||
|
is_coding_plan = existing_key.startswith("sk-kimi-")
|
||||||
|
if is_coding_plan:
|
||||||
|
effective_base = KIMI_CODE_BASE_URL
|
||||||
|
print(f" Detected Kimi Coding Plan key → {effective_base}")
|
||||||
|
else:
|
||||||
|
effective_base = pconfig.inference_base_url
|
||||||
|
print(f" Using Moonshot endpoint → {effective_base}")
|
||||||
|
# Clear any manual base URL override so auto-detection works at runtime
|
||||||
|
if base_url_env and get_env_value(base_url_env):
|
||||||
|
save_env_value(base_url_env, "")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Step 3: Model selection — show appropriate models for the endpoint
|
||||||
|
if is_coding_plan:
|
||||||
|
# Coding Plan models (kimi-for-coding first)
|
||||||
|
model_list = [
|
||||||
|
"kimi-for-coding",
|
||||||
|
"kimi-k2.5",
|
||||||
|
"kimi-k2-thinking",
|
||||||
|
"kimi-k2-thinking-turbo",
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
# Legacy Moonshot models
|
||||||
|
model_list = _PROVIDER_MODELS.get(provider_id, [])
|
||||||
|
|
||||||
|
if model_list:
|
||||||
|
selected = _prompt_model_selection(model_list, current_model=current_model)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
selected = input("Enter model name: ").strip()
|
||||||
|
except (KeyboardInterrupt, EOFError):
|
||||||
|
selected = None
|
||||||
|
|
||||||
|
if selected:
|
||||||
|
# Clear custom endpoint if set (avoid confusion)
|
||||||
|
if get_env_value("OPENAI_BASE_URL"):
|
||||||
|
save_env_value("OPENAI_BASE_URL", "")
|
||||||
|
save_env_value("OPENAI_API_KEY", "")
|
||||||
|
|
||||||
|
_save_model_choice(selected)
|
||||||
|
|
||||||
|
# Update config with provider and base URL
|
||||||
|
cfg = load_config()
|
||||||
|
model = cfg.get("model")
|
||||||
|
if not isinstance(model, dict):
|
||||||
|
model = {"default": model} if model else {}
|
||||||
|
cfg["model"] = model
|
||||||
|
model["provider"] = provider_id
|
||||||
|
model["base_url"] = effective_base
|
||||||
|
save_config(cfg)
|
||||||
|
deactivate_provider()
|
||||||
|
|
||||||
|
endpoint_label = "Kimi Coding" if is_coding_plan else "Moonshot"
|
||||||
|
print(f"Default model set to: {selected} (via {endpoint_label})")
|
||||||
|
else:
|
||||||
|
print("No change.")
|
||||||
|
|
||||||
|
|
||||||
def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||||
"""Generic flow for API-key providers (z.ai, Kimi, MiniMax)."""
|
"""Generic flow for API-key providers (z.ai, MiniMax)."""
|
||||||
from hermes_cli.auth import (
|
from hermes_cli.auth import (
|
||||||
PROVIDER_REGISTRY, _prompt_model_selection, _save_model_choice,
|
PROVIDER_REGISTRY, _prompt_model_selection, _save_model_choice,
|
||||||
_update_config_for_provider, deactivate_provider,
|
_update_config_for_provider, deactivate_provider,
|
||||||
|
|
|
||||||
|
|
@ -51,8 +51,10 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||||
"glm-4.5-flash",
|
"glm-4.5-flash",
|
||||||
],
|
],
|
||||||
"kimi-coding": [
|
"kimi-coding": [
|
||||||
|
"kimi-for-coding",
|
||||||
"kimi-k2.5",
|
"kimi-k2.5",
|
||||||
"kimi-k2-thinking",
|
"kimi-k2-thinking",
|
||||||
|
"kimi-k2-thinking-turbo",
|
||||||
"kimi-k2-turbo-preview",
|
"kimi-k2-turbo-preview",
|
||||||
"kimi-k2-0905-preview",
|
"kimi-k2-0905-preview",
|
||||||
],
|
],
|
||||||
|
|
|
||||||
|
|
@ -438,7 +438,7 @@ class AIAgent:
|
||||||
}
|
}
|
||||||
elif "api.kimi.com" in effective_base.lower():
|
elif "api.kimi.com" in effective_base.lower():
|
||||||
client_kwargs["default_headers"] = {
|
client_kwargs["default_headers"] = {
|
||||||
"User-Agent": "KimiCLI/1.0",
|
"User-Agent": "KimiCLI/1.3",
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
# No explicit creds — use the centralized provider router
|
# No explicit creds — use the centralized provider router
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue