From 36a4481152f4a5595b2aa5496e160ae7856f2892 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 19 Mar 2026 10:08:14 -0700 Subject: [PATCH] fix: prevent unavailable tool names from leaking into model schemas MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: prevent unavailable tool names from leaking into model schemas When web_search/web_extract fail check_fn (no API key configured), their names were still leaking into tool descriptions via two paths: 1. execute_code schema: sandbox_enabled was computed from tools_to_include (pre-filter) instead of the actual available tools (post-filter), so the execute_code description listed web_search/web_extract as available sandbox imports even when they weren't. 2. browser_navigate schema: hardcoded description said 'prefer web_search or web_extract' regardless of whether those tools existed. The model saw these references, assumed the tools existed, and tried calling them directly — triggering 'Unknown tool' errors. Fix: compute available_tool_names from the filtered result set and use that for both execute_code sandbox listing and browser_navigate description patching. * docs: add pitfall about cross-tool references in schema descriptions --------- Co-authored-by: Test --- AGENTS.md | 3 +++ model_tools.py | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index f3201f9f..5f18e6ff 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -366,6 +366,9 @@ Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-p ### `_last_resolved_tool_names` is a process-global in `model_tools.py` `_run_single_child()` in `delegate_tool.py` saves and restores this global around subagent execution. If you add new code that reads this global, be aware it may be temporarily stale during child agent runs. +### DO NOT hardcode cross-tool references in schema descriptions +Tool schema descriptions must not mention tools from other toolsets by name (e.g., `browser_navigate` saying "prefer web_search"). Those tools may be unavailable (missing API keys, disabled toolset), causing the model to hallucinate calls to non-existent tools. If a cross-reference is needed, add it dynamically in `get_tool_definitions()` in `model_tools.py` — see the `browser_navigate` / `execute_code` post-processing blocks for the pattern. + ### Tests must not write to `~/.hermes/` The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests. diff --git a/model_tools.py b/model_tools.py index 3d252f44..23849817 100644 --- a/model_tools.py +++ b/model_tools.py @@ -242,18 +242,45 @@ def get_tool_definitions( # Ask the registry for schemas (only returns tools whose check_fn passes) filtered_tools = registry.get_definitions(tools_to_include, quiet=quiet_mode) + # The set of tool names that actually passed check_fn filtering. + # Use this (not tools_to_include) for any downstream schema that references + # other tools by name — otherwise the model sees tools mentioned in + # descriptions that don't actually exist, and hallucinates calls to them. + available_tool_names = {t["function"]["name"] for t in filtered_tools} + # Rebuild execute_code schema to only list sandbox tools that are actually - # enabled. Without this, the model sees "web_search is available in - # execute_code" even when the user disabled the web toolset (#560-discord). - if "execute_code" in tools_to_include: + # available. Without this, the model sees "web_search is available in + # execute_code" even when the API key isn't configured or the toolset is + # disabled (#560-discord). + if "execute_code" in available_tool_names: from tools.code_execution_tool import SANDBOX_ALLOWED_TOOLS, build_execute_code_schema - sandbox_enabled = SANDBOX_ALLOWED_TOOLS & tools_to_include + sandbox_enabled = SANDBOX_ALLOWED_TOOLS & available_tool_names dynamic_schema = build_execute_code_schema(sandbox_enabled) for i, td in enumerate(filtered_tools): if td.get("function", {}).get("name") == "execute_code": filtered_tools[i] = {"type": "function", "function": dynamic_schema} break + # Strip web tool cross-references from browser_navigate description when + # web_search / web_extract are not available. The static schema says + # "prefer web_search or web_extract" which causes the model to hallucinate + # those tools when they're missing. + if "browser_navigate" in available_tool_names: + web_tools_available = {"web_search", "web_extract"} & available_tool_names + if not web_tools_available: + for i, td in enumerate(filtered_tools): + if td.get("function", {}).get("name") == "browser_navigate": + desc = td["function"].get("description", "") + desc = desc.replace( + " For simple information retrieval, prefer web_search or web_extract (faster, cheaper).", + "", + ) + filtered_tools[i] = { + "type": "function", + "function": {**td["function"], "description": desc}, + } + break + if not quiet_mode: if filtered_tools: tool_names = [t["function"]["name"] for t in filtered_tools]