Merge pull request #2182 from NousResearch/hermes/hermes-5d6932ba

fix: 6 bugs in model metadata, reasoning detection, and delegate tool
2026-03-20 08:53:01 -07:00 · 2026-03-20 08:53:01 -07:00 · 5e705bc31b
commit 5e705bc31b
parent a51a767407 55ce601502
4 changed files with 50 additions and 30 deletions
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@ -34,17 +34,29 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
 })
 _OLLAMA_TAG_PATTERN = re.compile(
    r"^(\d+\.?\d*b|latest|stable|q\d|fp?\d|instruct|chat|coder|vision|text)",
    re.IGNORECASE,
 )
 def _strip_provider_prefix(model: str) -> str:
    """Strip a recognised provider prefix from a model string.
    ``"local:my-model"`` → ``"my-model"``
    ``"qwen3.5:27b"``   → ``"qwen3.5:27b"``  (unchanged — not a provider prefix)
    ``"qwen:0.5b"``     → ``"qwen:0.5b"``    (unchanged — Ollama model:tag)
    ``"deepseek:latest"``→ ``"deepseek:latest"``(unchanged — Ollama model:tag)
    """
    if ":" not in model or model.startswith("http"):
        return model
-    prefix = model.split(":", 1)[0].strip().lower()
+    prefix, suffix = model.split(":", 1)
-    if prefix in _PROVIDER_PREFIXES:
+    prefix_lower = prefix.strip().lower()
-        return model.split(":", 1)[1]
+    if prefix_lower in _PROVIDER_PREFIXES:
        # Don't strip if suffix looks like an Ollama tag (e.g. "7b", "latest", "q4_0")
        if _OLLAMA_TAG_PATTERN.match(suffix.strip()):
            return model
        return suffix
    return model
 _model_metadata_cache: Dict[str, Dict[str, Any]] = {}
@ -800,7 +812,7 @@ def get_model_context_length(
        ctx = _resolve_nous_context_length(model)
        if ctx:
            return ctx
-    elif provider:
+    if provider:
        from agent.models_dev import lookup_models_dev_context
        ctx = lookup_models_dev_context(provider, model)
        if ctx:
@ -812,10 +824,13 @@ def get_model_context_length(
        return metadata[model].get("context_length", 128000)
    # 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
    # Only check `default_model in model` (is the key a substring of the input).
    # The reverse (`model in default_model`) causes shorter names like
    # "claude-sonnet-4" to incorrectly match "claude-sonnet-4-6" and return 1M.
    for default_model, length in sorted(
        DEFAULT_CONTEXT_LENGTHS.items(), key=lambda x: len(x[0]), reverse=True
    ):
-        if default_model in model or model in default_model:
+        if default_model in model:
            return length
    # 9. Query local server as last resort
--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@ -107,11 +107,12 @@ def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]:
    except Exception as e:
        logger.debug("Failed to fetch models.dev: %s", e)
-    # Fall back to disk cache
+    # Fall back to disk cache — use a short TTL (5 min) so we retry
    # the network fetch soon instead of serving stale data for a full hour.
    if not _models_dev_cache:
        _models_dev_cache = _load_disk_cache()
        if _models_dev_cache:
-            _models_dev_cache_time = time.time()
+            _models_dev_cache_time = time.time() - _MODELS_DEV_CACHE_TTL + 300
            logger.debug("Loaded models.dev from disk cache (%d providers)", len(_models_dev_cache))
    return _models_dev_cache
--- a/run_agent.py
+++ b/run_agent.py
@ -1142,10 +1142,11 @@ class AIAgent:
    def _has_content_after_think_block(self, content: str) -> bool:
        """
-        Check if content has actual text after any <think></think> blocks.
+        Check if content has actual text after any reasoning/thinking blocks.
        This detects cases where the model only outputs reasoning but no actual
        response, which indicates an incomplete generation that should be retried.
        Must stay in sync with _strip_think_blocks() tag variants.
        Args:
            content: The assistant message content to check
@ -1156,8 +1157,8 @@ class AIAgent:
        if not content:
            return False
-        # Remove all <think>...</think> blocks (including nested ones, non-greedy)
+        # Remove all reasoning tag variants (must match _strip_think_blocks)
-        cleaned = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
+        cleaned = self._strip_think_blocks(content)
        # Check if there's any non-whitespace content remaining
        return bool(cleaned.strip())
--- a/tools/delegate_tool.py
+++ b/tools/delegate_tool.py
@ -470,22 +470,25 @@ def delegate_task(
    _parent_tool_names = list(_model_tools._last_resolved_tool_names)
    # Build all child agents on the main thread (thread-safe construction)
    # Wrapped in try/finally so the global is always restored even if a
    # child build raises (otherwise _last_resolved_tool_names stays corrupted).
    children = []
-    for i, t in enumerate(task_list):
+    try:
-        child = _build_child_agent(
+        for i, t in enumerate(task_list):
-            task_index=i, goal=t["goal"], context=t.get("context"),
+            child = _build_child_agent(
-            toolsets=t.get("toolsets") or toolsets, model=creds["model"],
+                task_index=i, goal=t["goal"], context=t.get("context"),
-            max_iterations=effective_max_iter, parent_agent=parent_agent,
+                toolsets=t.get("toolsets") or toolsets, model=creds["model"],
-            override_provider=creds["provider"], override_base_url=creds["base_url"],
+                max_iterations=effective_max_iter, parent_agent=parent_agent,
-            override_api_key=creds["api_key"],
+                override_provider=creds["provider"], override_base_url=creds["base_url"],
-            override_api_mode=creds["api_mode"],
+                override_api_key=creds["api_key"],
-        )
+                override_api_mode=creds["api_mode"],
-        # Override with correct parent tool names (before child construction mutated global)
+            )
-        child._delegate_saved_tool_names = _parent_tool_names
+            # Override with correct parent tool names (before child construction mutated global)
-        children.append((i, t, child))
+            child._delegate_saved_tool_names = _parent_tool_names
-
+            children.append((i, t, child))
-    # Authoritative restore: reset global to parent's tool names after all children built
+    finally:
-    _model_tools._last_resolved_tool_names = _parent_tool_names
+        # Authoritative restore: reset global to parent's tool names after all children built
        _model_tools._last_resolved_tool_names = _parent_tool_names
    if n_tasks == 1:
        # Single task -- run directly (no thread pool overhead)