terminal tool

2025-07-26 04:31:17 +00:00 · 2025-07-26 04:31:17 +00:00 · a49596cbb2
commit a49596cbb2
parent 122d8788ae
3 changed files with 121 additions and 185 deletions
--- a/model_tools.py
+++ b/model_tools.py
@ -24,7 +24,7 @@ from typing import Dict, Any, List
 # Import toolsets
 from web_tools import web_search_tool, web_extract_tool, web_crawl_tool, check_tavily_api_key
-from terminal_tool import terminal_execute_tool, terminal_session_tool, check_hecate_requirements
+from terminal_tool import terminal_tool, check_hecate_requirements
 def get_web_tool_definitions() -> List[Dict[str, Any]]:
    """
@ -37,7 +37,7 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]:
        {
            "type": "function",
            "function": {
-                "name": "web_search_tool",
+                "name": "web_search",
                "description": "Search the web for information on any topic. Returns relevant results with titles, URLs, content snippets, and answers. Uses advanced search depth for comprehensive results.",
                "parameters": {
                    "type": "object",
@ -61,7 +61,7 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]:
        {
            "type": "function",
            "function": {
-                "name": "web_extract_tool",
+                "name": "web_extract",
                "description": "Extract and read the full content from specific web page URLs. Useful for getting detailed information from webpages found through search.",
                "parameters": {
                    "type": "object",
@ -85,7 +85,7 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]:
        {
            "type": "function",
            "function": {
-                "name": "web_crawl_tool",
+                "name": "web_crawl",
                "description": "Crawl a website with specific instructions to find and extract targeted content. Uses AI to intelligently navigate and extract relevant information from across the site.",
                "parameters": {
                    "type": "object",
@ -122,8 +122,8 @@ def get_terminal_tool_definitions() -> List[Dict[str, Any]]:
        {
            "type": "function",
            "function": {
-                "name": "terminal_execute_tool",
+                "name": "terminal",
-                "description": "Execute a command on a Linux VM and get the output. Automatically manages VM lifecycle - creates VMs on demand, reuses existing VMs, and cleans up after inactivity.",
+                "description": "Execute commands on a Linux VM with optional interactive session support. Automatically manages VM lifecycle - creates VMs on demand, reuses existing VMs, and cleans up after inactivity. Can handle both simple commands and interactive sessions.",
                "parameters": {
                    "type": "object",
                    "properties": {
@ -131,46 +131,25 @@ def get_terminal_tool_definitions() -> List[Dict[str, Any]]:
                            "type": "string",
                            "description": "The command to execute on the VM"
                        },
                        "input_keys": {
                            "type": "string",
                            "description": "Keystrokes to send to the most recent interactive session (e.g., 'hello\\n' for typing hello + Enter). If no active session exists, this will be ignored."
                        },
                        "background": {
                            "type": "boolean",
                            "description": "Whether to run the command in the background (default: false)",
                            "default": False
                        },
                        "timeout": {
                            "type": "integer",
                            "description": "Command timeout in seconds (optional)",
                            "minimum": 1
                        }
                    },
                    "required": ["command"]
                }
            }
        },
        {
            "type": "function",
            "function": {
                "name": "terminal_session_tool",
                "description": "Execute commands in an interactive terminal session. Useful for running interactive programs (vim, python REPL, etc.), maintaining state between commands, or sending keystrokes to running programs.",
                "parameters": {
                    "type": "object",
                    "properties": {
                        "command": {
                            "type": "string",
                            "description": "Command to start a new session (optional if continuing existing session)"
                        },
                        "input_keys": {
                            "type": "string",
                            "description": "Keystrokes to send to the session (e.g., 'hello\\n' for typing hello + Enter)"
                        },
                        "session_id": {
                            "type": "string",
                            "description": "ID of existing session to continue (optional)"
                        },
                        "idle_threshold": {
                            "type": "number",
                            "description": "Seconds to wait for output before considering session idle (default: 5.0)",
                            "default": 5.0,
                            "minimum": 0.1
                        },
                        "timeout": {
                            "type": "integer",
                            "description": "Command timeout in seconds (optional)",
                            "minimum": 1
                        }
                    },
                    "required": []
@ -215,21 +194,21 @@ def handle_web_function_call(function_name: str, function_args: Dict[str, Any])
    Returns:
        str: Function result as JSON string
    """
-    if function_name == "web_search_tool":
+    if function_name == "web_search":
        query = function_args.get("query", "")
        limit = function_args.get("limit", 5)
        # Ensure limit is within bounds
        limit = max(1, min(10, limit))
        return web_search_tool(query, limit)
-    elif function_name == "web_extract_tool":
+    elif function_name == "web_extract":
        urls = function_args.get("urls", [])
        # Limit URLs to prevent abuse
        urls = urls[:5] if isinstance(urls, list) else []
        format = function_args.get("format")
        return web_extract_tool(urls, format)
-    elif function_name == "web_crawl_tool":
+    elif function_name == "web_crawl":
        url = function_args.get("url", "")
        instructions = function_args.get("instructions")
        depth = function_args.get("depth", "basic")
@ -249,18 +228,14 @@ def handle_terminal_function_call(function_name: str, function_args: Dict[str, A
    Returns:
        str: Function result as JSON string
    """
-    if function_name == "terminal_execute_tool":
+    if function_name == "terminal":
        command = function_args.get("command", "")
        background = function_args.get("background", False)
        timeout = function_args.get("timeout")
        return terminal_execute_tool(command, background, timeout)
    elif function_name == "terminal_session_tool":
        command = function_args.get("command")
        input_keys = function_args.get("input_keys")
-        session_id = function_args.get("session_id")
+        background = function_args.get("background", False)
        idle_threshold = function_args.get("idle_threshold", 5.0)
-        return terminal_session_tool(command, input_keys, session_id, idle_threshold)
+        timeout = function_args.get("timeout")
        # Session management is handled internally - don't pass session_id from model
        return terminal_tool(command, input_keys, None, background, idle_threshold, timeout)
    else:
        return json.dumps({"error": f"Unknown terminal function: {function_name}"})
@ -285,11 +260,11 @@ def handle_function_call(function_name: str, function_args: Dict[str, Any]) -> s
    """
    try:
        # Route web tools
-        if function_name in ["web_search_tool", "web_extract_tool", "web_crawl_tool"]:
+        if function_name in ["web_search", "web_extract", "web_crawl"]:
            return handle_web_function_call(function_name, function_args)
        # Route terminal tools
-        elif function_name in ["terminal_execute_tool", "terminal_session_tool"]:
+        elif function_name in ["terminal"]:
            return handle_terminal_function_call(function_name, function_args)
        # Future toolsets can be routed here:
@ -324,8 +299,8 @@ def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
        },
        "terminal_tools": {
            "available": check_hecate_requirements(),
-            "tools": ["terminal_execute_tool", "terminal_session_tool"],
+            "tools": ["terminal_tool"],
-            "description": "Execute commands and manage interactive sessions on Linux VMs",
+            "description": "Execute commands with optional interactive session support on Linux VMs",
            "requirements": ["MORPH_API_KEY environment variable", "hecate package"]
        }
        # Future toolsets can be added here
--- a/run_agent.py
+++ b/run_agent.py
@ -25,6 +25,7 @@ import os
 import time
 from typing import List, Dict, Any, Optional
 from openai import OpenAI
 import fire
 # Import our tool system
 from model_tools import get_tool_definitions, handle_function_call, check_toolset_requirements
@ -276,28 +277,46 @@ class AIAgent:
        return result["final_response"]
-def main():
+def main(
    query: str = None,
    model: str = "claude-opus-4-20250514", 
    api_key: str = None,
    base_url: str = "https://api.anthropic.com/v1/",
    max_turns: int = 10
 ):
    """
    Main function for running the agent directly.
    Args:
        query (str): Natural language query for the agent. Defaults to Python 3.13 example.
        model (str): Model name to use. Defaults to claude-opus-4-20250514.
        api_key (str): API key for authentication. Uses ANTHROPIC_API_KEY env var if not provided.
        base_url (str): Base URL for the model API. Defaults to https://api.anthropic.com/v1/
        max_turns (int): Maximum number of API call iterations. Defaults to 10.
    """
    print("🤖 AI Agent with Tool Calling")
    print("=" * 50)
-    # Initialize agent with local SGLang server (modify as needed)
+    # Initialize agent with provided parameters
    try:
        agent = AIAgent(
-            base_url="https://api.anthropic.com/v1/",
+            base_url=base_url,
-            model="claude-opus-4-20250514"
+            model=model,
            api_key=api_key,
            max_iterations=max_turns
        )
    except RuntimeError as e:
        print(f"❌ Failed to initialize agent: {e}")
        return
-    # Example conversation
+    # Use provided query or default to Python 3.13 example
-    user_query = (
+    if query is None:
-        "Tell me about the latest developments in Python 3.12 and what new features "
+        user_query = (
-        "developers should know about. Please search for current information and try it out."
+            "Tell me about the latest developments in Python 3.13 and what new features "
-    )
+            "developers should know about. Please search for current information and try it out."
        )
    else:
        user_query = query
    print(f"\n📝 User Query: {user_query}")
    print("\n" + "=" * 50)
@ -321,4 +340,4 @@ def main():
 if __name__ == "__main__":
-    main()
+    fire.Fire(main)
--- a/terminal_tool.py
+++ b/terminal_tool.py
@ -1,67 +1,85 @@
 #!/usr/bin/env python3
 """
-Terminal Tools Module
+Terminal Tool Module
-This module provides terminal/command execution tools using Hecate's VM infrastructure.
+This module provides a single terminal tool using Hecate's VM infrastructure.
 It wraps Hecate's functionality to provide a simple interface for executing commands
 on Morph VMs with automatic lifecycle management.
-Available tools:
+Available tool:
- terminal_execute_tool: Execute a single command and get output
+- terminal_tool: Execute commands with optional interactive session support
 - terminal_session_tool: Execute a command in a persistent session
 Usage:
-    from terminal_tool import terminal_execute_tool, terminal_session_tool
+    from terminal_tool import terminal_tool
    # Execute a single command
-    result = terminal_execute_tool("ls -la")
+    result = terminal_tool("ls -la")
-    # Execute in a session (for interactive commands)
+    # Execute in an interactive session
-    result = terminal_session_tool("python", input_keys="print('hello')\\nexit()\\n")
+    result = terminal_tool("python", input_keys="print('hello')\\nexit()\\n")
 """
 import json
 import os
-from typing import Optional
+from typing import Optional, Dict, Any
 from hecate import run_tool_with_lifecycle_management
 from morphcloud._llm import ToolCall
-def terminal_execute_tool(
+def terminal_tool(
-    command: str,
+    command: Optional[str] = None,
    input_keys: Optional[str] = None,
    session_id: Optional[str] = None,
    background: bool = False,
    idle_threshold: float = 5.0,
    timeout: Optional[int] = None
 ) -> str:
    """
-    Execute a command on a Morph VM and return the output.
+    Execute a command on a Morph VM with optional interactive session support.
    This tool uses Hecate's VM lifecycle management to automatically create
    and manage VMs. VMs are reused within the configured lifetime window
    and automatically cleaned up after inactivity.
    Args:
-        command: The command to execute
+        command: The command to execute (optional if continuing existing session)
-        background: Whether to run the command in the background (default: False)
+        input_keys: Keystrokes to send to interactive session (e.g., "hello\\n")
        session_id: ID of existing session to continue (optional)
        background: Whether to run the command in the background (default: False) 
        idle_threshold: Seconds to wait for output before considering session idle (default: 5.0)
        timeout: Command timeout in seconds (optional)
    Returns:
-        str: JSON string containing the command output, exit code, and any errors
+        str: JSON string containing command output, session info, exit code, and any errors
-    Example:
+    Examples:
-        >>> result = terminal_execute_tool("ls -la /tmp")
+        # Execute a simple command
-        >>> print(json.loads(result))
+        >>> result = terminal_tool(command="ls -la /tmp")
-        {
+        
-            "output": "total 8\\ndrwxrwxrwt 2 root root 4096 ...",
+        # Start an interactive Python session
-            "exit_code": 0,
+        >>> result = terminal_tool(command="python3")
-            "error": null
+        >>> session_data = json.loads(result)
-        }
+        >>> session_id = session_data["session_id"]
        # Send input to the session
        >>> result = terminal_tool(input_keys="print('Hello')\\n", session_id=session_id)
        # Run a background task
        >>> result = terminal_tool(command="sleep 60", background=True)
    """
    try:
-        # Create tool call for Hecate
+        # Build tool input based on provided parameters
-        tool_input = {
+        tool_input = {}
            "command": command,
            "background": background
        }
        if command:
            tool_input["command"] = command
        if input_keys:
            tool_input["input_keys"] = input_keys
        if session_id:
            tool_input["session_id"] = session_id
        if background:
            tool_input["background"] = background
        if idle_threshold != 5.0:
            tool_input["idle_threshold"] = idle_threshold
        if timeout is not None:
            tool_input["timeout"] = timeout
@ -73,104 +91,25 @@ def terminal_execute_tool(
        # Execute with lifecycle management
        result = run_tool_with_lifecycle_management(tool_call)
-        # Format the result
+        # Format the result with all possible fields
        formatted_result = {
            "output": result.get("output", ""),
            "screen": result.get("screen", ""),
            "session_id": result.get("session_id"),
            "exit_code": result.get("returncode", result.get("exit_code", -1)),
-            "error": result.get("error")
+            "error": result.get("error"),
            "status": "active" if result.get("session_id") else "ended"
        }
        # Add session info if present (for interactive sessions)
        if "session_id" in result:
            formatted_result["session_id"] = result["session_id"]
        if "screen" in result:
            formatted_result["screen"] = result["screen"]
        return json.dumps(formatted_result)
    except Exception as e:
        return json.dumps({
            "output": "",
            "exit_code": -1,
            "error": f"Failed to execute command: {str(e)}"
        })
 def terminal_session_tool(
    command: Optional[str] = None,
    input_keys: Optional[str] = None,
    session_id: Optional[str] = None,
    idle_threshold: float = 5.0
 ) -> str:
    """
    Execute a command in an interactive terminal session.
    This tool is useful for:
    - Running interactive programs (vim, python REPL, etc.)
    - Maintaining state between commands
    - Sending keystrokes to running programs
    Args:
        command: Command to start a new session (optional if continuing existing session)
        input_keys: Keystrokes to send to the session (e.g., "hello\\n" for typing hello + Enter)
        session_id: ID of existing session to continue (optional)
        idle_threshold: Seconds to wait for output before considering session idle (default: 5.0)
    Returns:
        str: JSON string containing session info, screen content, and any errors
    Example:
        # Start a Python REPL session
        >>> result = terminal_session_tool("python")
        >>> session_data = json.loads(result)
        >>> session_id = session_data["session_id"]
        # Send commands to the session
        >>> result = terminal_session_tool(
        ...     input_keys="print('Hello, World!')\\n",
        ...     session_id=session_id
        ... )
    """
    try:
        tool_input = {}
        if command:
            tool_input["command"] = command
        if input_keys:
            tool_input["input_keys"] = input_keys
        if session_id:
            tool_input["session_id"] = session_id
        if idle_threshold != 5.0:
            tool_input["idle_threshold"] = idle_threshold
        tool_call = ToolCall(
            name="run_command",
            input=tool_input
        )
        # Execute with lifecycle management
        result = run_tool_with_lifecycle_management(tool_call)
        # Format the result for session tools
        formatted_result = {
            "session_id": result.get("session_id"),
            "screen": result.get("screen", ""),
            "exit_code": result.get("returncode", result.get("exit_code", 0)),
            "error": result.get("error"),
            "status": "active" if result.get("session_id") else "ended"
        }
        # Include output if present (for non-interactive commands)
        if "output" in result:
            formatted_result["output"] = result["output"]
        return json.dumps(formatted_result)
    except Exception as e:
        return json.dumps({
            "session_id": None,
            "screen": "",
            "session_id": None,
            "exit_code": -1,
-            "error": f"Failed to manage session: {str(e)}",
+            "error": f"Failed to execute terminal command: {str(e)}",
            "status": "error"
        })
@ -211,7 +150,7 @@ if __name__ == "__main__":
    """
    Simple test/demo when run directly
    """
-    print("Terminal Tools Module")
+    print("Terminal Tool Module")
    print("=" * 40)
    if not _requirements_met:
@ -219,26 +158,29 @@ if __name__ == "__main__":
        exit(1)
    print("All requirements met!")
-    print("\nAvailable Tools:")
+    print("\nAvailable Tool:")
-    print("  - terminal_execute_tool: Execute single commands")
+    print("  - terminal_tool: Execute commands with optional interactive session support")
    print("  - terminal_session_tool: Interactive terminal sessions")
    print("\nUsage Examples:")
    print("  # Execute a command")
-    print("  result = terminal_execute_tool('ls -la')")
+    print("  result = terminal_tool(command='ls -la')")
    print("  ")
    print("  # Start an interactive session")
-    print("  result = terminal_session_tool('python')")
+    print("  result = terminal_tool(command='python3')")
    print("  session_data = json.loads(result)")
    print("  session_id = session_data['session_id']")
    print("  ")
    print("  # Send input to the session")
-    print("  result = terminal_session_tool(")
+    print("  result = terminal_tool(")
    print("      input_keys='print(\"Hello\")\\\\n',")
    print("      session_id=session_id")
    print("  )")
    print("  ")
    print("  # Run a background task")
    print("  result = terminal_tool(command='sleep 60', background=True)")
    print("\nEnvironment Variables:")
    print(f"  MORPH_API_KEY: {'Set' if os.getenv('MORPH_API_KEY') else 'Not set'}")
    print(f"  OPENAI_API_KEY: {'Set' if os.getenv('OPENAI_API_KEY') else 'Not set (optional)'}")
-    print(f"  HECATE_VM_LIFETIME_SECONDS: {os.getenv('HECATE_VM_LIFETIME_SECONDS', '300')} (default: 300)")
+    print(f"  HECATE_VM_LIFETIME_SECONDS: {os.getenv('HECATE_VM_LIFETIME_SECONDS', '300')} (default: 300)")
    print(f"  HECATE_DEFAULT_SNAPSHOT_ID: {os.getenv('HECATE_DEFAULT_SNAPSHOT_ID', 'snapshot_p5294qxt')} (default: snapshot_p5294qxt)")