Merge pull request #2215 from NousResearch/hermes/hermes-31d7db3b
fix: infer provider from base URL for models.dev context length lookup
This commit is contained in:
commit
8e884fb3f1
3 changed files with 66 additions and 18 deletions
|
|
@ -151,22 +151,42 @@ def _is_custom_endpoint(base_url: str) -> bool:
|
||||||
return bool(normalized) and not _is_openrouter_base_url(normalized)
|
return bool(normalized) and not _is_openrouter_base_url(normalized)
|
||||||
|
|
||||||
|
|
||||||
def _is_known_provider_base_url(base_url: str) -> bool:
|
_URL_TO_PROVIDER: Dict[str, str] = {
|
||||||
|
"api.openai.com": "openai",
|
||||||
|
"chatgpt.com": "openai",
|
||||||
|
"api.anthropic.com": "anthropic",
|
||||||
|
"api.z.ai": "zai",
|
||||||
|
"api.moonshot.ai": "kimi-coding",
|
||||||
|
"api.kimi.com": "kimi-coding",
|
||||||
|
"api.minimax": "minimax",
|
||||||
|
"dashscope.aliyuncs.com": "alibaba",
|
||||||
|
"dashscope-intl.aliyuncs.com": "alibaba",
|
||||||
|
"openrouter.ai": "openrouter",
|
||||||
|
"inference-api.nousresearch.com": "nous",
|
||||||
|
"api.deepseek.com": "deepseek",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _infer_provider_from_url(base_url: str) -> Optional[str]:
|
||||||
|
"""Infer the models.dev provider name from a base URL.
|
||||||
|
|
||||||
|
This allows context length resolution via models.dev for custom endpoints
|
||||||
|
like DashScope (Alibaba), Z.AI, Kimi, etc. without requiring the user to
|
||||||
|
explicitly set the provider name in config.
|
||||||
|
"""
|
||||||
normalized = _normalize_base_url(base_url)
|
normalized = _normalize_base_url(base_url)
|
||||||
if not normalized:
|
if not normalized:
|
||||||
return False
|
return None
|
||||||
parsed = urlparse(normalized if "://" in normalized else f"https://{normalized}")
|
parsed = urlparse(normalized if "://" in normalized else f"https://{normalized}")
|
||||||
host = parsed.netloc.lower() or parsed.path.lower()
|
host = parsed.netloc.lower() or parsed.path.lower()
|
||||||
known_hosts = (
|
for url_part, provider in _URL_TO_PROVIDER.items():
|
||||||
"api.openai.com",
|
if url_part in host:
|
||||||
"chatgpt.com",
|
return provider
|
||||||
"api.anthropic.com",
|
return None
|
||||||
"api.z.ai",
|
|
||||||
"api.moonshot.ai",
|
|
||||||
"api.kimi.com",
|
def _is_known_provider_base_url(base_url: str) -> bool:
|
||||||
"api.minimax",
|
return _infer_provider_from_url(base_url) is not None
|
||||||
)
|
|
||||||
return any(known_host in host for known_host in known_hosts)
|
|
||||||
|
|
||||||
|
|
||||||
def is_local_endpoint(base_url: str) -> bool:
|
def is_local_endpoint(base_url: str) -> bool:
|
||||||
|
|
@ -808,13 +828,21 @@ def get_model_context_length(
|
||||||
# These are provider-specific and take priority over the generic OR cache,
|
# These are provider-specific and take priority over the generic OR cache,
|
||||||
# since the same model can have different context limits per provider
|
# since the same model can have different context limits per provider
|
||||||
# (e.g. claude-opus-4.6 is 1M on Anthropic but 128K on GitHub Copilot).
|
# (e.g. claude-opus-4.6 is 1M on Anthropic but 128K on GitHub Copilot).
|
||||||
if provider == "nous":
|
# If provider is generic (openrouter/custom/empty), try to infer from URL.
|
||||||
|
effective_provider = provider
|
||||||
|
if not effective_provider or effective_provider in ("openrouter", "custom"):
|
||||||
|
if base_url:
|
||||||
|
inferred = _infer_provider_from_url(base_url)
|
||||||
|
if inferred:
|
||||||
|
effective_provider = inferred
|
||||||
|
|
||||||
|
if effective_provider == "nous":
|
||||||
ctx = _resolve_nous_context_length(model)
|
ctx = _resolve_nous_context_length(model)
|
||||||
if ctx:
|
if ctx:
|
||||||
return ctx
|
return ctx
|
||||||
if provider:
|
if effective_provider:
|
||||||
from agent.models_dev import lookup_models_dev_context
|
from agent.models_dev import lookup_models_dev_context
|
||||||
ctx = lookup_models_dev_context(provider, model)
|
ctx = lookup_models_dev_context(effective_provider, model)
|
||||||
if ctx:
|
if ctx:
|
||||||
return ctx
|
return ctx
|
||||||
|
|
||||||
|
|
|
||||||
10
cli.py
10
cli.py
|
|
@ -1504,7 +1504,7 @@ class HermesCLI:
|
||||||
_cprint(f"{_DIM}└{'─' * (w - 2)}┘{_RST}")
|
_cprint(f"{_DIM}└{'─' * (w - 2)}┘{_RST}")
|
||||||
self._reasoning_box_opened = False
|
self._reasoning_box_opened = False
|
||||||
|
|
||||||
def _stream_delta(self, text: str) -> None:
|
def _stream_delta(self, text) -> None:
|
||||||
"""Line-buffered streaming callback for real-time token rendering.
|
"""Line-buffered streaming callback for real-time token rendering.
|
||||||
|
|
||||||
Receives text deltas from the agent as tokens arrive. Buffers
|
Receives text deltas from the agent as tokens arrive. Buffers
|
||||||
|
|
@ -1514,7 +1514,15 @@ class HermesCLI:
|
||||||
Reasoning/thinking blocks (<REASONING_SCRATCHPAD>, <think>, etc.)
|
Reasoning/thinking blocks (<REASONING_SCRATCHPAD>, <think>, etc.)
|
||||||
are suppressed during streaming since they'd display raw XML tags.
|
are suppressed during streaming since they'd display raw XML tags.
|
||||||
The agent strips them from the final response anyway.
|
The agent strips them from the final response anyway.
|
||||||
|
|
||||||
|
A ``None`` value signals an intermediate turn boundary (tools are
|
||||||
|
about to execute). Flushes any open boxes and resets state so
|
||||||
|
tool feed lines render cleanly between turns.
|
||||||
"""
|
"""
|
||||||
|
if text is None:
|
||||||
|
self._flush_stream()
|
||||||
|
self._reset_stream_state()
|
||||||
|
return
|
||||||
if not text:
|
if not text:
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
|
||||||
16
run_agent.py
16
run_agent.py
|
|
@ -4838,7 +4838,7 @@ class AIAgent:
|
||||||
spinner.stop(cute_msg)
|
spinner.stop(cute_msg)
|
||||||
elif self.quiet_mode:
|
elif self.quiet_mode:
|
||||||
self._vprint(f" {cute_msg}")
|
self._vprint(f" {cute_msg}")
|
||||||
elif self.quiet_mode and not self._has_stream_consumers():
|
elif self.quiet_mode:
|
||||||
face = random.choice(KawaiiSpinner.KAWAII_WAITING)
|
face = random.choice(KawaiiSpinner.KAWAII_WAITING)
|
||||||
emoji = _get_tool_emoji(function_name)
|
emoji = _get_tool_emoji(function_name)
|
||||||
preview = _build_tool_preview(function_name, function_args) or function_name
|
preview = _build_tool_preview(function_name, function_args) or function_name
|
||||||
|
|
@ -6568,7 +6568,19 @@ class AIAgent:
|
||||||
self._vprint(f" ┊ 💬 {clean}")
|
self._vprint(f" ┊ 💬 {clean}")
|
||||||
|
|
||||||
messages.append(assistant_msg)
|
messages.append(assistant_msg)
|
||||||
|
|
||||||
|
# Close any open streaming display (response box, reasoning
|
||||||
|
# box) before tool execution begins. Intermediate turns may
|
||||||
|
# have streamed early content that opened the response box;
|
||||||
|
# flushing here prevents it from wrapping tool feed lines.
|
||||||
|
# Only signal the display callback — TTS (_stream_callback)
|
||||||
|
# should NOT receive None (it uses None as end-of-stream).
|
||||||
|
if self.stream_delta_callback:
|
||||||
|
try:
|
||||||
|
self.stream_delta_callback(None)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
_msg_count_before_tools = len(messages)
|
_msg_count_before_tools = len(messages)
|
||||||
self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)
|
self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue