feat: implement code execution sandbox for programmatic tool calling

- Introduced a new `execute_code` tool that allows the agent to run Python scripts that call Hermes tools via RPC, reducing the number of round trips required for tool interactions.
- Added configuration options for timeout and maximum tool calls in the sandbox environment.
- Updated the toolset definitions to include the new code execution capabilities, ensuring integration across platforms.
- Implemented comprehensive tests for the code execution sandbox, covering various scenarios including tool call limits and error handling.
- Enhanced the CLI and documentation to reflect the new functionality, providing users with clear guidance on using the code execution tool.
This commit is contained in:
teknium1 2026-02-19 23:23:43 -08:00
parent 748f0b2b5f
commit 783acd712d
10 changed files with 1598 additions and 18 deletions

View file

@ -95,6 +95,8 @@ from tools.memory_tool import memory_tool, check_memory_requirements, MEMORY_SCH
from tools.session_search_tool import session_search, check_session_search_requirements, SESSION_SEARCH_SCHEMA
# Clarifying questions tool
from tools.clarify_tool import clarify_tool, check_clarify_requirements, CLARIFY_SCHEMA
# Code execution sandbox (programmatic tool calling)
from tools.code_execution_tool import execute_code, check_sandbox_requirements, EXECUTE_CODE_SCHEMA
from toolsets import (
get_toolset, resolve_toolset, resolve_multiple_toolsets,
get_all_toolsets, get_toolset_names, validate_toolset,
@ -212,6 +214,13 @@ TOOLSET_REQUIREMENTS = {
"setup_url": None,
"tools": ["clarify"],
},
"code_execution": {
"name": "Code Execution Sandbox",
"env_vars": [], # Uses stdlib only (subprocess, socket), no external deps
"check_fn": check_sandbox_requirements,
"setup_url": None,
"tools": ["execute_code"],
},
}
@ -1005,6 +1014,13 @@ def get_clarify_tool_definitions() -> List[Dict[str, Any]]:
return [{"type": "function", "function": CLARIFY_SCHEMA}]
def get_execute_code_tool_definitions() -> List[Dict[str, Any]]:
"""
Get tool definitions for the code execution sandbox (programmatic tool calling).
"""
return [{"type": "function", "function": EXECUTE_CODE_SCHEMA}]
def get_send_message_tool_definitions():
"""Tool definitions for cross-channel messaging."""
return [
@ -1174,6 +1190,10 @@ def get_all_tool_names() -> List[str]:
if check_clarify_requirements():
tool_names.extend(["clarify"])
# Code execution sandbox (programmatic tool calling)
if check_sandbox_requirements():
tool_names.extend(["execute_code"])
# Cross-channel messaging (always available on messaging platforms)
tool_names.extend(["send_message"])
@ -1236,6 +1256,10 @@ TOOL_TO_TOOLSET_MAP = {
"memory": "memory_tools",
# Session history search
"session_search": "session_search_tools",
# Clarifying questions
"clarify": "clarify_tools",
# Code execution sandbox
"execute_code": "code_execution_tools",
}
@ -1252,6 +1276,11 @@ def get_toolset_for_tool(tool_name: str) -> str:
return TOOL_TO_TOOLSET_MAP.get(tool_name, "unknown")
# Stores the resolved tool name list from the most recent get_tool_definitions()
# call, so execute_code can determine which tools are available in this session.
_last_resolved_tool_names: Optional[List[str]] = None
def get_tool_definitions(
enabled_toolsets: List[str] = None,
disabled_toolsets: List[str] = None,
@ -1364,6 +1393,11 @@ def get_tool_definitions(
for tool in get_clarify_tool_definitions():
all_available_tools_map[tool["function"]["name"]] = tool
# Code execution sandbox (programmatic tool calling)
if check_sandbox_requirements():
for tool in get_execute_code_tool_definitions():
all_available_tools_map[tool["function"]["name"]] = tool
# Cross-channel messaging (always available on messaging platforms)
for tool in get_send_message_tool_definitions():
all_available_tools_map[tool["function"]["name"]] = tool
@ -1491,6 +1525,10 @@ def get_tool_definitions(
else:
print("🛠️ No tools selected (all filtered out or unavailable)")
# Store resolved names so execute_code knows what's available in this session
global _last_resolved_tool_names
_last_resolved_tool_names = [t["function"]["name"] for t in filtered_tools]
return filtered_tools
def handle_web_function_call(function_name: str, function_args: Dict[str, Any]) -> str:
@ -2239,6 +2277,15 @@ def handle_function_call(
elif function_name in ["read_file", "write_file", "patch", "search"]:
return handle_file_function_call(function_name, function_args, task_id)
# Route code execution sandbox (programmatic tool calling)
elif function_name == "execute_code":
code = function_args.get("code", "")
return execute_code(
code=code,
task_id=task_id,
enabled_tools=_last_resolved_tool_names,
)
# Route text-to-speech tools
elif function_name in ["text_to_speech"]:
return handle_tts_function_call(function_name, function_args)
@ -2367,6 +2414,12 @@ def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
"tools": ["clarify"],
"description": "Clarifying questions: ask the user multiple-choice or open-ended questions",
"requirements": []
},
"code_execution_tools": {
"available": check_sandbox_requirements(),
"tools": ["execute_code"],
"description": "Code execution sandbox: run Python scripts that call tools programmatically",
"requirements": ["Linux or macOS (Unix domain sockets)"]
}
}
@ -2389,7 +2442,8 @@ def check_toolset_requirements() -> Dict[str, bool]:
"browser_tools": check_browser_requirements(),
"cronjob_tools": check_cronjob_requirements(),
"file_tools": check_file_requirements(),
"tts_tools": check_tts_requirements()
"tts_tools": check_tts_requirements(),
"code_execution_tools": check_sandbox_requirements(),
}
if __name__ == "__main__":