Merge pull request #2182 from NousResearch/hermes/hermes-5d6932ba
fix: 6 bugs in model metadata, reasoning detection, and delegate tool
This commit is contained in:
commit
5e705bc31b
4 changed files with 50 additions and 30 deletions
|
|
@ -34,17 +34,29 @@ _PROVIDER_PREFIXES: frozenset[str] = frozenset({
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
_OLLAMA_TAG_PATTERN = re.compile(
|
||||||
|
r"^(\d+\.?\d*b|latest|stable|q\d|fp?\d|instruct|chat|coder|vision|text)",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _strip_provider_prefix(model: str) -> str:
|
def _strip_provider_prefix(model: str) -> str:
|
||||||
"""Strip a recognised provider prefix from a model string.
|
"""Strip a recognised provider prefix from a model string.
|
||||||
|
|
||||||
``"local:my-model"`` → ``"my-model"``
|
``"local:my-model"`` → ``"my-model"``
|
||||||
``"qwen3.5:27b"`` → ``"qwen3.5:27b"`` (unchanged — not a provider prefix)
|
``"qwen3.5:27b"`` → ``"qwen3.5:27b"`` (unchanged — not a provider prefix)
|
||||||
|
``"qwen:0.5b"`` → ``"qwen:0.5b"`` (unchanged — Ollama model:tag)
|
||||||
|
``"deepseek:latest"``→ ``"deepseek:latest"``(unchanged — Ollama model:tag)
|
||||||
"""
|
"""
|
||||||
if ":" not in model or model.startswith("http"):
|
if ":" not in model or model.startswith("http"):
|
||||||
return model
|
return model
|
||||||
prefix = model.split(":", 1)[0].strip().lower()
|
prefix, suffix = model.split(":", 1)
|
||||||
if prefix in _PROVIDER_PREFIXES:
|
prefix_lower = prefix.strip().lower()
|
||||||
return model.split(":", 1)[1]
|
if prefix_lower in _PROVIDER_PREFIXES:
|
||||||
|
# Don't strip if suffix looks like an Ollama tag (e.g. "7b", "latest", "q4_0")
|
||||||
|
if _OLLAMA_TAG_PATTERN.match(suffix.strip()):
|
||||||
|
return model
|
||||||
|
return suffix
|
||||||
return model
|
return model
|
||||||
|
|
||||||
_model_metadata_cache: Dict[str, Dict[str, Any]] = {}
|
_model_metadata_cache: Dict[str, Dict[str, Any]] = {}
|
||||||
|
|
@ -800,7 +812,7 @@ def get_model_context_length(
|
||||||
ctx = _resolve_nous_context_length(model)
|
ctx = _resolve_nous_context_length(model)
|
||||||
if ctx:
|
if ctx:
|
||||||
return ctx
|
return ctx
|
||||||
elif provider:
|
if provider:
|
||||||
from agent.models_dev import lookup_models_dev_context
|
from agent.models_dev import lookup_models_dev_context
|
||||||
ctx = lookup_models_dev_context(provider, model)
|
ctx = lookup_models_dev_context(provider, model)
|
||||||
if ctx:
|
if ctx:
|
||||||
|
|
@ -812,10 +824,13 @@ def get_model_context_length(
|
||||||
return metadata[model].get("context_length", 128000)
|
return metadata[model].get("context_length", 128000)
|
||||||
|
|
||||||
# 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
|
# 8. Hardcoded defaults (fuzzy match — longest key first for specificity)
|
||||||
|
# Only check `default_model in model` (is the key a substring of the input).
|
||||||
|
# The reverse (`model in default_model`) causes shorter names like
|
||||||
|
# "claude-sonnet-4" to incorrectly match "claude-sonnet-4-6" and return 1M.
|
||||||
for default_model, length in sorted(
|
for default_model, length in sorted(
|
||||||
DEFAULT_CONTEXT_LENGTHS.items(), key=lambda x: len(x[0]), reverse=True
|
DEFAULT_CONTEXT_LENGTHS.items(), key=lambda x: len(x[0]), reverse=True
|
||||||
):
|
):
|
||||||
if default_model in model or model in default_model:
|
if default_model in model:
|
||||||
return length
|
return length
|
||||||
|
|
||||||
# 9. Query local server as last resort
|
# 9. Query local server as last resort
|
||||||
|
|
|
||||||
|
|
@ -107,11 +107,12 @@ def fetch_models_dev(force_refresh: bool = False) -> Dict[str, Any]:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Failed to fetch models.dev: %s", e)
|
logger.debug("Failed to fetch models.dev: %s", e)
|
||||||
|
|
||||||
# Fall back to disk cache
|
# Fall back to disk cache — use a short TTL (5 min) so we retry
|
||||||
|
# the network fetch soon instead of serving stale data for a full hour.
|
||||||
if not _models_dev_cache:
|
if not _models_dev_cache:
|
||||||
_models_dev_cache = _load_disk_cache()
|
_models_dev_cache = _load_disk_cache()
|
||||||
if _models_dev_cache:
|
if _models_dev_cache:
|
||||||
_models_dev_cache_time = time.time()
|
_models_dev_cache_time = time.time() - _MODELS_DEV_CACHE_TTL + 300
|
||||||
logger.debug("Loaded models.dev from disk cache (%d providers)", len(_models_dev_cache))
|
logger.debug("Loaded models.dev from disk cache (%d providers)", len(_models_dev_cache))
|
||||||
|
|
||||||
return _models_dev_cache
|
return _models_dev_cache
|
||||||
|
|
|
||||||
|
|
@ -1142,10 +1142,11 @@ class AIAgent:
|
||||||
|
|
||||||
def _has_content_after_think_block(self, content: str) -> bool:
|
def _has_content_after_think_block(self, content: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if content has actual text after any <think></think> blocks.
|
Check if content has actual text after any reasoning/thinking blocks.
|
||||||
|
|
||||||
This detects cases where the model only outputs reasoning but no actual
|
This detects cases where the model only outputs reasoning but no actual
|
||||||
response, which indicates an incomplete generation that should be retried.
|
response, which indicates an incomplete generation that should be retried.
|
||||||
|
Must stay in sync with _strip_think_blocks() tag variants.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
content: The assistant message content to check
|
content: The assistant message content to check
|
||||||
|
|
@ -1156,8 +1157,8 @@ class AIAgent:
|
||||||
if not content:
|
if not content:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Remove all <think>...</think> blocks (including nested ones, non-greedy)
|
# Remove all reasoning tag variants (must match _strip_think_blocks)
|
||||||
cleaned = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
|
cleaned = self._strip_think_blocks(content)
|
||||||
|
|
||||||
# Check if there's any non-whitespace content remaining
|
# Check if there's any non-whitespace content remaining
|
||||||
return bool(cleaned.strip())
|
return bool(cleaned.strip())
|
||||||
|
|
|
||||||
|
|
@ -470,22 +470,25 @@ def delegate_task(
|
||||||
_parent_tool_names = list(_model_tools._last_resolved_tool_names)
|
_parent_tool_names = list(_model_tools._last_resolved_tool_names)
|
||||||
|
|
||||||
# Build all child agents on the main thread (thread-safe construction)
|
# Build all child agents on the main thread (thread-safe construction)
|
||||||
|
# Wrapped in try/finally so the global is always restored even if a
|
||||||
|
# child build raises (otherwise _last_resolved_tool_names stays corrupted).
|
||||||
children = []
|
children = []
|
||||||
for i, t in enumerate(task_list):
|
try:
|
||||||
child = _build_child_agent(
|
for i, t in enumerate(task_list):
|
||||||
task_index=i, goal=t["goal"], context=t.get("context"),
|
child = _build_child_agent(
|
||||||
toolsets=t.get("toolsets") or toolsets, model=creds["model"],
|
task_index=i, goal=t["goal"], context=t.get("context"),
|
||||||
max_iterations=effective_max_iter, parent_agent=parent_agent,
|
toolsets=t.get("toolsets") or toolsets, model=creds["model"],
|
||||||
override_provider=creds["provider"], override_base_url=creds["base_url"],
|
max_iterations=effective_max_iter, parent_agent=parent_agent,
|
||||||
override_api_key=creds["api_key"],
|
override_provider=creds["provider"], override_base_url=creds["base_url"],
|
||||||
override_api_mode=creds["api_mode"],
|
override_api_key=creds["api_key"],
|
||||||
)
|
override_api_mode=creds["api_mode"],
|
||||||
# Override with correct parent tool names (before child construction mutated global)
|
)
|
||||||
child._delegate_saved_tool_names = _parent_tool_names
|
# Override with correct parent tool names (before child construction mutated global)
|
||||||
children.append((i, t, child))
|
child._delegate_saved_tool_names = _parent_tool_names
|
||||||
|
children.append((i, t, child))
|
||||||
# Authoritative restore: reset global to parent's tool names after all children built
|
finally:
|
||||||
_model_tools._last_resolved_tool_names = _parent_tool_names
|
# Authoritative restore: reset global to parent's tool names after all children built
|
||||||
|
_model_tools._last_resolved_tool_names = _parent_tool_names
|
||||||
|
|
||||||
if n_tasks == 1:
|
if n_tasks == 1:
|
||||||
# Single task -- run directly (no thread pool overhead)
|
# Single task -- run directly (no thread pool overhead)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue