diff --git a/agent/model_metadata.py b/agent/model_metadata.py index d5eebd07..43bf73f8 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -33,6 +33,8 @@ DEFAULT_CONTEXT_LENGTHS = { "meta-llama/llama-3.3-70b-instruct": 131072, "deepseek/deepseek-chat-v3": 65536, "qwen/qwen-2.5-72b-instruct": 32768, + "glm-4.7": 202752, + "glm-5": 202752, } diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index c930e0c7..96b1305f 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -72,12 +72,24 @@ def _resolve_openrouter_runtime( or OPENROUTER_BASE_URL ).rstrip("/") - api_key = ( - explicit_api_key - or os.getenv("OPENROUTER_API_KEY") - or os.getenv("OPENAI_API_KEY") - or "" - ) + # When base_url points to a non-OpenRouter endpoint (e.g. Z.ai, local LLM), + # prefer OPENAI_API_KEY so the correct credentials reach the correct provider. + # This allows OPENROUTER_API_KEY to coexist for auxiliary tasks (compression + # summaries, vision, session search) without hijacking main inference. + if base_url and "openrouter" not in base_url.lower(): + api_key = ( + explicit_api_key + or os.getenv("OPENAI_API_KEY") + or os.getenv("OPENROUTER_API_KEY") + or "" + ) + else: + api_key = ( + explicit_api_key + or os.getenv("OPENROUTER_API_KEY") + or os.getenv("OPENAI_API_KEY") + or "" + ) source = "explicit" if (explicit_api_key or explicit_base_url) else "env/config"