merge: resolve file_tools.py conflict with origin/main
Combine read/search loop detection with main's redact_sensitive_text and truncation hint features. Add tracker reset to TestSearchHints to prevent cross-test state leakage.
This commit is contained in:
commit
4684aaffdc
104 changed files with 13720 additions and 2489 deletions
|
|
@ -63,7 +63,7 @@ import time
|
|||
import requests
|
||||
from typing import Dict, Any, Optional, List
|
||||
from pathlib import Path
|
||||
from agent.auxiliary_client import get_vision_auxiliary_client
|
||||
from agent.auxiliary_client import get_vision_auxiliary_client, get_text_auxiliary_client
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -80,8 +80,38 @@ DEFAULT_SESSION_TIMEOUT = 300
|
|||
# Max tokens for snapshot content before summarization
|
||||
SNAPSHOT_SUMMARIZE_THRESHOLD = 8000
|
||||
|
||||
# Resolve vision auxiliary client for extraction/vision tasks
|
||||
_aux_vision_client, EXTRACTION_MODEL = get_vision_auxiliary_client()
|
||||
# Vision client — for browser_vision (screenshot analysis)
|
||||
# Wrapped in try/except so a broken auxiliary config doesn't prevent the entire
|
||||
# browser_tool module from importing (which would disable all 10 browser tools).
|
||||
try:
|
||||
_aux_vision_client, _DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
|
||||
except Exception as _init_err:
|
||||
logger.debug("Could not initialise vision auxiliary client: %s", _init_err)
|
||||
_aux_vision_client, _DEFAULT_VISION_MODEL = None, None
|
||||
|
||||
# Text client — for page snapshot summarization (same config as web_extract)
|
||||
try:
|
||||
_aux_text_client, _DEFAULT_TEXT_MODEL = get_text_auxiliary_client("web_extract")
|
||||
except Exception as _init_err:
|
||||
logger.debug("Could not initialise text auxiliary client: %s", _init_err)
|
||||
_aux_text_client, _DEFAULT_TEXT_MODEL = None, None
|
||||
|
||||
# Module-level alias for availability checks
|
||||
EXTRACTION_MODEL = _DEFAULT_TEXT_MODEL or _DEFAULT_VISION_MODEL
|
||||
|
||||
|
||||
def _get_vision_model() -> str:
|
||||
"""Model for browser_vision (screenshot analysis — multimodal)."""
|
||||
return (os.getenv("AUXILIARY_VISION_MODEL", "").strip()
|
||||
or _DEFAULT_VISION_MODEL
|
||||
or "google/gemini-3-flash-preview")
|
||||
|
||||
|
||||
def _get_extraction_model() -> str:
|
||||
"""Model for page snapshot text summarization — same as web_extract."""
|
||||
return (os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip()
|
||||
or _DEFAULT_TEXT_MODEL
|
||||
or "google/gemini-3-flash-preview")
|
||||
|
||||
|
||||
def _is_local_mode() -> bool:
|
||||
|
|
@ -94,9 +124,27 @@ def _is_local_mode() -> bool:
|
|||
return not (os.environ.get("BROWSERBASE_API_KEY") and os.environ.get("BROWSERBASE_PROJECT_ID"))
|
||||
|
||||
|
||||
def _socket_safe_tmpdir() -> str:
|
||||
"""Return a short temp directory path suitable for Unix domain sockets.
|
||||
|
||||
macOS sets ``TMPDIR`` to ``/var/folders/xx/.../T/`` (~51 chars). When we
|
||||
append ``agent-browser-hermes_…`` the resulting socket path exceeds the
|
||||
104-byte macOS limit for ``AF_UNIX`` addresses, causing agent-browser to
|
||||
fail with "Failed to create socket directory" or silent screenshot failures.
|
||||
|
||||
Linux ``tempfile.gettempdir()`` already returns ``/tmp``, so this is a
|
||||
no-op there. On macOS we bypass ``TMPDIR`` and use ``/tmp`` directly
|
||||
(symlink to ``/private/tmp``, sticky-bit protected, always available).
|
||||
"""
|
||||
if sys.platform == "darwin":
|
||||
return "/tmp"
|
||||
return tempfile.gettempdir()
|
||||
|
||||
|
||||
# Track active sessions per task
|
||||
# Stores: session_name (always), bb_session_id + cdp_url (cloud mode only)
|
||||
_active_sessions: Dict[str, Dict[str, str]] = {} # task_id -> {session_name, ...}
|
||||
_recording_sessions: set = set() # task_ids with active recordings
|
||||
|
||||
# Flag to track if cleanup has been done
|
||||
_cleanup_done = False
|
||||
|
|
@ -145,7 +193,7 @@ def _emergency_cleanup_all_sessions():
|
|||
try:
|
||||
browser_cmd = _find_agent_browser()
|
||||
task_socket_dir = os.path.join(
|
||||
tempfile.gettempdir(),
|
||||
_socket_safe_tmpdir(),
|
||||
f"agent-browser-{session_name}"
|
||||
)
|
||||
env = {**os.environ, "AGENT_BROWSER_SOCKET_DIR": task_socket_dir}
|
||||
|
|
@ -431,11 +479,31 @@ BROWSER_TOOL_SCHEMAS = [
|
|||
"question": {
|
||||
"type": "string",
|
||||
"description": "What you want to know about the page visually. Be specific about what you're looking for."
|
||||
},
|
||||
"annotate": {
|
||||
"type": "boolean",
|
||||
"default": False,
|
||||
"description": "If true, overlay numbered [N] labels on interactive elements. Each [N] maps to ref @eN for subsequent browser commands. Useful for QA and spatial reasoning about page layout."
|
||||
}
|
||||
},
|
||||
"required": ["question"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "browser_console",
|
||||
"description": "Get browser console output and JavaScript errors from the current page. Returns console.log/warn/error/info messages and uncaught JS exceptions. Use this to detect silent JavaScript errors, failed API calls, and application warnings. Requires browser_navigate to be called first.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"clear": {
|
||||
"type": "boolean",
|
||||
"default": False,
|
||||
"description": "If true, clear the message buffers after reading"
|
||||
}
|
||||
},
|
||||
"required": []
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
|
|
@ -755,6 +823,7 @@ def _run_browser_command(
|
|||
try:
|
||||
browser_cmd = _find_agent_browser()
|
||||
except FileNotFoundError as e:
|
||||
logger.warning("agent-browser CLI not found: %s", e)
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
from tools.interrupt import is_interrupted
|
||||
|
|
@ -765,6 +834,7 @@ def _run_browser_command(
|
|||
try:
|
||||
session_info = _get_session_info(task_id)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to create browser session for task=%s: %s", task_id, e)
|
||||
return {"success": False, "error": f"Failed to create browser session: {str(e)}"}
|
||||
|
||||
# Build the command with the appropriate backend flag.
|
||||
|
|
@ -790,10 +860,12 @@ def _run_browser_command(
|
|||
# Without this, parallel workers fight over the same default socket path,
|
||||
# causing "Failed to create socket directory: Permission denied" errors.
|
||||
task_socket_dir = os.path.join(
|
||||
tempfile.gettempdir(),
|
||||
_socket_safe_tmpdir(),
|
||||
f"agent-browser-{session_info['session_name']}"
|
||||
)
|
||||
os.makedirs(task_socket_dir, exist_ok=True)
|
||||
os.makedirs(task_socket_dir, mode=0o700, exist_ok=True)
|
||||
logger.debug("browser cmd=%s task=%s socket_dir=%s (%d chars)",
|
||||
command, task_id, task_socket_dir, len(task_socket_dir))
|
||||
|
||||
browser_env = {**os.environ}
|
||||
# Ensure PATH includes standard dirs (systemd services may have minimal PATH)
|
||||
|
|
@ -835,22 +907,29 @@ def _run_browser_command(
|
|||
"returncode=%s", result.returncode)
|
||||
return parsed
|
||||
except json.JSONDecodeError:
|
||||
# If not valid JSON, return as raw output
|
||||
# Non-JSON output indicates agent-browser crash or version mismatch
|
||||
raw = result.stdout.strip()[:500]
|
||||
logger.warning("browser '%s' returned non-JSON output (rc=%s): %s",
|
||||
command, result.returncode, raw[:200])
|
||||
return {
|
||||
"success": True,
|
||||
"data": {"raw": result.stdout.strip()}
|
||||
"data": {"raw": raw}
|
||||
}
|
||||
|
||||
# Check for errors
|
||||
if result.returncode != 0:
|
||||
error_msg = result.stderr.strip() if result.stderr else f"Command failed with code {result.returncode}"
|
||||
logger.warning("browser '%s' failed (rc=%s): %s", command, result.returncode, error_msg[:300])
|
||||
return {"success": False, "error": error_msg}
|
||||
|
||||
return {"success": True, "data": {}}
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
logger.warning("browser '%s' timed out after %ds (task=%s, socket_dir=%s)",
|
||||
command, timeout, task_id, task_socket_dir)
|
||||
return {"success": False, "error": f"Command timed out after {timeout} seconds"}
|
||||
except Exception as e:
|
||||
logger.warning("browser '%s' exception: %s", command, e, exc_info=True)
|
||||
return {"success": False, "error": str(e)}
|
||||
|
||||
|
||||
|
|
@ -860,9 +939,9 @@ def _extract_relevant_content(
|
|||
) -> str:
|
||||
"""Use LLM to extract relevant content from a snapshot based on the user's task.
|
||||
|
||||
Falls back to simple truncation when no auxiliary vision model is configured.
|
||||
Falls back to simple truncation when no auxiliary text model is configured.
|
||||
"""
|
||||
if _aux_vision_client is None or EXTRACTION_MODEL is None:
|
||||
if _aux_text_client is None:
|
||||
return _truncate_snapshot(snapshot_text)
|
||||
|
||||
if user_task:
|
||||
|
|
@ -890,8 +969,8 @@ def _extract_relevant_content(
|
|||
|
||||
try:
|
||||
from agent.auxiliary_client import auxiliary_max_tokens_param
|
||||
response = _aux_vision_client.chat.completions.create(
|
||||
model=EXTRACTION_MODEL,
|
||||
response = _aux_text_client.chat.completions.create(
|
||||
model=_get_extraction_model(),
|
||||
messages=[{"role": "user", "content": extraction_prompt}],
|
||||
**auxiliary_max_tokens_param(4000),
|
||||
temperature=0.1,
|
||||
|
|
@ -940,9 +1019,10 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
|
|||
session_info = _get_session_info(effective_task_id)
|
||||
is_first_nav = session_info.get("_first_nav", True)
|
||||
|
||||
# Mark that we've done at least one navigation
|
||||
# Auto-start recording if configured and this is first navigation
|
||||
if is_first_nav:
|
||||
session_info["_first_nav"] = False
|
||||
_maybe_start_recording(effective_task_id)
|
||||
|
||||
result = _run_browser_command(effective_task_id, "open", [url], timeout=60)
|
||||
|
||||
|
|
@ -1206,6 +1286,10 @@ def browser_close(task_id: Optional[str] = None) -> str:
|
|||
JSON string with close result
|
||||
"""
|
||||
effective_task_id = task_id or "default"
|
||||
|
||||
# Stop auto-recording before closing
|
||||
_maybe_stop_recording(effective_task_id)
|
||||
|
||||
result = _run_browser_command(effective_task_id, "close", [])
|
||||
|
||||
# Close the backend session (Browserbase API in cloud mode, nothing extra in local mode)
|
||||
|
|
@ -1236,6 +1320,103 @@ def browser_close(task_id: Optional[str] = None) -> str:
|
|||
}, ensure_ascii=False)
|
||||
|
||||
|
||||
def browser_console(clear: bool = False, task_id: Optional[str] = None) -> str:
|
||||
"""Get browser console messages and JavaScript errors.
|
||||
|
||||
Returns both console output (log/warn/error/info from the page's JS)
|
||||
and uncaught exceptions (crashes, unhandled promise rejections).
|
||||
|
||||
Args:
|
||||
clear: If True, clear the message/error buffers after reading
|
||||
task_id: Task identifier for session isolation
|
||||
|
||||
Returns:
|
||||
JSON string with console messages and JS errors
|
||||
"""
|
||||
effective_task_id = task_id or "default"
|
||||
|
||||
console_args = ["--clear"] if clear else []
|
||||
error_args = ["--clear"] if clear else []
|
||||
|
||||
console_result = _run_browser_command(effective_task_id, "console", console_args)
|
||||
errors_result = _run_browser_command(effective_task_id, "errors", error_args)
|
||||
|
||||
messages = []
|
||||
if console_result.get("success"):
|
||||
for msg in console_result.get("data", {}).get("messages", []):
|
||||
messages.append({
|
||||
"type": msg.get("type", "log"),
|
||||
"text": msg.get("text", ""),
|
||||
"source": "console",
|
||||
})
|
||||
|
||||
errors = []
|
||||
if errors_result.get("success"):
|
||||
for err in errors_result.get("data", {}).get("errors", []):
|
||||
errors.append({
|
||||
"message": err.get("message", ""),
|
||||
"source": "exception",
|
||||
})
|
||||
|
||||
return json.dumps({
|
||||
"success": True,
|
||||
"console_messages": messages,
|
||||
"js_errors": errors,
|
||||
"total_messages": len(messages),
|
||||
"total_errors": len(errors),
|
||||
}, ensure_ascii=False)
|
||||
|
||||
|
||||
def _maybe_start_recording(task_id: str):
|
||||
"""Start recording if browser.record_sessions is enabled in config."""
|
||||
if task_id in _recording_sessions:
|
||||
return
|
||||
try:
|
||||
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||
config_path = hermes_home / "config.yaml"
|
||||
record_enabled = False
|
||||
if config_path.exists():
|
||||
import yaml
|
||||
with open(config_path) as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
record_enabled = cfg.get("browser", {}).get("record_sessions", False)
|
||||
|
||||
if not record_enabled:
|
||||
return
|
||||
|
||||
recordings_dir = hermes_home / "browser_recordings"
|
||||
recordings_dir.mkdir(parents=True, exist_ok=True)
|
||||
_cleanup_old_recordings(max_age_hours=72)
|
||||
|
||||
import time
|
||||
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
||||
recording_path = recordings_dir / f"session_{timestamp}_{task_id[:16]}.webm"
|
||||
|
||||
result = _run_browser_command(task_id, "record", ["start", str(recording_path)])
|
||||
if result.get("success"):
|
||||
_recording_sessions.add(task_id)
|
||||
logger.info("Auto-recording browser session %s to %s", task_id, recording_path)
|
||||
else:
|
||||
logger.debug("Could not start auto-recording: %s", result.get("error"))
|
||||
except Exception as e:
|
||||
logger.debug("Auto-recording setup failed: %s", e)
|
||||
|
||||
|
||||
def _maybe_stop_recording(task_id: str):
|
||||
"""Stop recording if one is active for this session."""
|
||||
if task_id not in _recording_sessions:
|
||||
return
|
||||
try:
|
||||
result = _run_browser_command(task_id, "record", ["stop"])
|
||||
if result.get("success"):
|
||||
path = result.get("data", {}).get("path", "")
|
||||
logger.info("Saved browser recording for session %s: %s", task_id, path)
|
||||
except Exception as e:
|
||||
logger.debug("Could not stop recording for %s: %s", task_id, e)
|
||||
finally:
|
||||
_recording_sessions.discard(task_id)
|
||||
|
||||
|
||||
def browser_get_images(task_id: Optional[str] = None) -> str:
|
||||
"""
|
||||
Get all images on the current page.
|
||||
|
|
@ -1290,7 +1471,7 @@ def browser_get_images(task_id: Optional[str] = None) -> str:
|
|||
}, ensure_ascii=False)
|
||||
|
||||
|
||||
def browser_vision(question: str, task_id: Optional[str] = None) -> str:
|
||||
def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] = None) -> str:
|
||||
"""
|
||||
Take a screenshot of the current page and analyze it with vision AI.
|
||||
|
||||
|
|
@ -1304,6 +1485,7 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
|
|||
|
||||
Args:
|
||||
question: What you want to know about the page visually
|
||||
annotate: If True, overlay numbered [N] labels on interactive elements
|
||||
task_id: Task identifier for session isolation
|
||||
|
||||
Returns:
|
||||
|
|
@ -1316,7 +1498,7 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
|
|||
effective_task_id = task_id or "default"
|
||||
|
||||
# Check auxiliary vision client
|
||||
if _aux_vision_client is None or EXTRACTION_MODEL is None:
|
||||
if _aux_vision_client is None or _DEFAULT_VISION_MODEL is None:
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "Browser vision unavailable: no auxiliary vision model configured. "
|
||||
|
|
@ -1335,24 +1517,35 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
|
|||
_cleanup_old_screenshots(screenshots_dir, max_age_hours=24)
|
||||
|
||||
# Take screenshot using agent-browser
|
||||
screenshot_args = [str(screenshot_path)]
|
||||
if annotate:
|
||||
screenshot_args.insert(0, "--annotate")
|
||||
result = _run_browser_command(
|
||||
effective_task_id,
|
||||
"screenshot",
|
||||
[str(screenshot_path)],
|
||||
screenshot_args,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if not result.get("success"):
|
||||
error_detail = result.get("error", "Unknown error")
|
||||
mode = "local" if _is_local_mode() else "cloud"
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": f"Failed to take screenshot: {result.get('error', 'Unknown error')}"
|
||||
"error": f"Failed to take screenshot ({mode} mode): {error_detail}"
|
||||
}, ensure_ascii=False)
|
||||
|
||||
# Check if screenshot file was created
|
||||
if not screenshot_path.exists():
|
||||
mode = "local" if _is_local_mode() else "cloud"
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "Screenshot file was not created"
|
||||
"error": (
|
||||
f"Screenshot file was not created at {screenshot_path} ({mode} mode). "
|
||||
f"This may indicate a socket path issue (macOS /var/folders/), "
|
||||
f"a missing Chromium install ('agent-browser install'), "
|
||||
f"or a stale daemon process."
|
||||
),
|
||||
}, ensure_ascii=False)
|
||||
|
||||
# Read and convert to base64
|
||||
|
|
@ -1371,8 +1564,11 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
|
|||
|
||||
# Use the sync auxiliary vision client directly
|
||||
from agent.auxiliary_client import auxiliary_max_tokens_param
|
||||
vision_model = _get_vision_model()
|
||||
logger.debug("browser_vision: analysing screenshot (%d bytes) with model=%s",
|
||||
len(image_data), vision_model)
|
||||
response = _aux_vision_client.chat.completions.create(
|
||||
model=EXTRACTION_MODEL,
|
||||
model=vision_model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
|
|
@ -1387,23 +1583,27 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str:
|
|||
)
|
||||
|
||||
analysis = response.choices[0].message.content
|
||||
return json.dumps({
|
||||
response_data = {
|
||||
"success": True,
|
||||
"analysis": analysis,
|
||||
"screenshot_path": str(screenshot_path),
|
||||
}, ensure_ascii=False)
|
||||
}
|
||||
# Include annotation data if annotated screenshot was taken
|
||||
if annotate and result.get("data", {}).get("annotations"):
|
||||
response_data["annotations"] = result["data"]["annotations"]
|
||||
return json.dumps(response_data, ensure_ascii=False)
|
||||
|
||||
except Exception as e:
|
||||
# Clean up screenshot on failure
|
||||
# Keep the screenshot if it was captured successfully — the failure is
|
||||
# in the LLM vision analysis, not the capture. Deleting a valid
|
||||
# screenshot loses evidence the user might need. The 24-hour cleanup
|
||||
# in _cleanup_old_screenshots prevents unbounded disk growth.
|
||||
logger.warning("browser_vision failed: %s", e, exc_info=True)
|
||||
error_info = {"success": False, "error": f"Error during vision analysis: {str(e)}"}
|
||||
if screenshot_path.exists():
|
||||
try:
|
||||
screenshot_path.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": f"Error during vision analysis: {str(e)}"
|
||||
}, ensure_ascii=False)
|
||||
error_info["screenshot_path"] = str(screenshot_path)
|
||||
error_info["note"] = "Screenshot was captured but vision analysis failed. You can still share it via MEDIA:<path>."
|
||||
return json.dumps(error_info, ensure_ascii=False)
|
||||
|
||||
|
||||
def _cleanup_old_screenshots(screenshots_dir, max_age_hours=24):
|
||||
|
|
@ -1421,6 +1621,25 @@ def _cleanup_old_screenshots(screenshots_dir, max_age_hours=24):
|
|||
pass # Non-critical — don't fail the screenshot operation
|
||||
|
||||
|
||||
def _cleanup_old_recordings(max_age_hours=72):
|
||||
"""Remove browser recordings older than max_age_hours to prevent disk bloat."""
|
||||
import time
|
||||
try:
|
||||
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||
recordings_dir = hermes_home / "browser_recordings"
|
||||
if not recordings_dir.exists():
|
||||
return
|
||||
cutoff = time.time() - (max_age_hours * 3600)
|
||||
for f in recordings_dir.glob("session_*.webm"):
|
||||
try:
|
||||
if f.stat().st_mtime < cutoff:
|
||||
f.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Cleanup and Management Functions
|
||||
# ============================================================================
|
||||
|
|
@ -1492,6 +1711,9 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
|
|||
bb_session_id = session_info.get("bb_session_id", "unknown")
|
||||
logger.debug("Found session for task %s: bb_session_id=%s", task_id, bb_session_id)
|
||||
|
||||
# Stop auto-recording before closing (saves the file)
|
||||
_maybe_stop_recording(task_id)
|
||||
|
||||
# Try to close via agent-browser first (needs session in _active_sessions)
|
||||
try:
|
||||
_run_browser_command(task_id, "close", [], timeout=10)
|
||||
|
|
@ -1517,7 +1739,7 @@ def cleanup_browser(task_id: Optional[str] = None) -> None:
|
|||
# Kill the daemon process and clean up socket directory
|
||||
session_name = session_info.get("session_name", "")
|
||||
if session_name:
|
||||
socket_dir = os.path.join(tempfile.gettempdir(), f"agent-browser-{session_name}")
|
||||
socket_dir = os.path.join(_socket_safe_tmpdir(), f"agent-browser-{session_name}")
|
||||
if os.path.exists(socket_dir):
|
||||
# agent-browser writes {session}.pid in the socket dir
|
||||
pid_file = os.path.join(socket_dir, f"{session_name}.pid")
|
||||
|
|
@ -1707,6 +1929,13 @@ registry.register(
|
|||
name="browser_vision",
|
||||
toolset="browser",
|
||||
schema=_BROWSER_SCHEMA_MAP["browser_vision"],
|
||||
handler=lambda args, **kw: browser_vision(question=args.get("question", ""), task_id=kw.get("task_id")),
|
||||
handler=lambda args, **kw: browser_vision(question=args.get("question", ""), annotate=args.get("annotate", False), task_id=kw.get("task_id")),
|
||||
check_fn=check_browser_requirements,
|
||||
)
|
||||
registry.register(
|
||||
name="browser_console",
|
||||
toolset="browser",
|
||||
schema=_BROWSER_SCHEMA_MAP["browser_console"],
|
||||
handler=lambda args, **kw: browser_console(clear=args.get("clear", False), task_id=kw.get("task_id")),
|
||||
check_fn=check_browser_requirements,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -385,7 +385,11 @@ def execute_code(
|
|||
|
||||
# --- Set up temp directory with hermes_tools.py and script.py ---
|
||||
tmpdir = tempfile.mkdtemp(prefix="hermes_sandbox_")
|
||||
sock_path = os.path.join(tempfile.gettempdir(), f"hermes_rpc_{uuid.uuid4().hex}.sock")
|
||||
# Use /tmp on macOS to avoid the long /var/folders/... path that pushes
|
||||
# Unix domain socket paths past the 104-byte macOS AF_UNIX limit.
|
||||
# On Linux, tempfile.gettempdir() already returns /tmp.
|
||||
_sock_tmpdir = "/tmp" if sys.platform == "darwin" else tempfile.gettempdir()
|
||||
sock_path = os.path.join(_sock_tmpdir, f"hermes_rpc_{uuid.uuid4().hex}.sock")
|
||||
|
||||
tool_call_log: list = []
|
||||
tool_call_counter = [0] # mutable so the RPC thread can increment
|
||||
|
|
|
|||
|
|
@ -102,7 +102,9 @@ def schedule_cronjob(
|
|||
- "local": Save to local files only (~/.hermes/cron/output/)
|
||||
- "telegram": Send to Telegram home channel
|
||||
- "discord": Send to Discord home channel
|
||||
- "signal": Send to Signal home channel
|
||||
- "telegram:123456": Send to specific chat ID
|
||||
- "signal:+15551234567": Send to specific Signal number
|
||||
|
||||
Returns:
|
||||
JSON with job_id, next_run time, and confirmation
|
||||
|
|
@ -216,7 +218,7 @@ Use for: reminders, periodic checks, scheduled reports, automated maintenance.""
|
|||
},
|
||||
"deliver": {
|
||||
"type": "string",
|
||||
"description": "Where to send output: 'origin' (back to this chat), 'local' (files only), 'telegram', 'discord', or 'platform:chat_id'"
|
||||
"description": "Where to send output: 'origin' (back to this chat), 'local' (files only), 'telegram', 'discord', 'signal', or 'platform:chat_id'"
|
||||
}
|
||||
},
|
||||
"required": ["prompt", "schedule"]
|
||||
|
|
|
|||
|
|
@ -819,6 +819,14 @@ class ShellFileOperations(FileOperations):
|
|||
# Expand ~ and other shell paths
|
||||
path = self._expand_path(path)
|
||||
|
||||
# Validate that the path exists before searching
|
||||
check = self._exec(f"test -e {self._escape_shell_arg(path)} && echo exists || echo not_found")
|
||||
if "not_found" in check.stdout:
|
||||
return SearchResult(
|
||||
error=f"Path not found: {path}. Verify the path exists (use 'terminal' to check).",
|
||||
total_count=0
|
||||
)
|
||||
|
||||
if target == "files":
|
||||
return self._search_files(pattern, path, limit, offset)
|
||||
else:
|
||||
|
|
@ -919,6 +927,11 @@ class ShellFileOperations(FileOperations):
|
|||
cmd = " ".join(cmd_parts)
|
||||
result = self._exec(cmd, timeout=60)
|
||||
|
||||
# rg exit codes: 0=matches found, 1=no matches, 2=error
|
||||
if result.exit_code == 2 and not result.stdout.strip():
|
||||
error_msg = result.stderr.strip() if hasattr(result, 'stderr') and result.stderr else "Search error"
|
||||
return SearchResult(error=f"Search failed: {error_msg}", total_count=0)
|
||||
|
||||
# Parse results based on output mode
|
||||
if output_mode == "files_only":
|
||||
all_files = [f for f in result.stdout.strip().split('\n') if f]
|
||||
|
|
@ -1013,6 +1026,11 @@ class ShellFileOperations(FileOperations):
|
|||
cmd = " ".join(cmd_parts)
|
||||
result = self._exec(cmd, timeout=60)
|
||||
|
||||
# grep exit codes: 0=matches found, 1=no matches, 2=error
|
||||
if result.exit_code == 2 and not result.stdout.strip():
|
||||
error_msg = result.stderr.strip() if hasattr(result, 'stderr') and result.stderr else "Search error"
|
||||
return SearchResult(error=f"Search failed: {error_msg}", total_count=0)
|
||||
|
||||
if output_mode == "files_only":
|
||||
all_files = [f for f in result.stdout.strip().split('\n') if f]
|
||||
total = len(all_files)
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import os
|
|||
import threading
|
||||
from typing import Optional
|
||||
from tools.file_operations import ShellFileOperations
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -133,6 +134,8 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
|
|||
try:
|
||||
file_ops = _get_file_ops(task_id)
|
||||
result = file_ops.read_file(path, offset, limit)
|
||||
if result.content:
|
||||
result.content = redact_sensitive_text(result.content)
|
||||
result_dict = result.to_dict()
|
||||
|
||||
# Track reads to detect re-read loops (e.g. after context compression)
|
||||
|
|
@ -224,7 +227,13 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
|
|||
else:
|
||||
return json.dumps({"error": f"Unknown mode: {mode}"})
|
||||
|
||||
return json.dumps(result.to_dict(), ensure_ascii=False)
|
||||
result_dict = result.to_dict()
|
||||
result_json = json.dumps(result_dict, ensure_ascii=False)
|
||||
# Hint when old_string not found — saves iterations where the agent
|
||||
# retries with stale content instead of re-reading the file.
|
||||
if result_dict.get("error") and "Could not find" in str(result_dict["error"]):
|
||||
result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]"
|
||||
return result_json
|
||||
except Exception as e:
|
||||
return json.dumps({"error": str(e)}, ensure_ascii=False)
|
||||
|
||||
|
|
@ -258,6 +267,10 @@ def search_tool(pattern: str, target: str = "content", path: str = ".",
|
|||
pattern=pattern, path=path, target=target, file_glob=file_glob,
|
||||
limit=limit, offset=offset, output_mode=output_mode, context=context
|
||||
)
|
||||
if hasattr(result, 'matches'):
|
||||
for m in result.matches:
|
||||
if hasattr(m, 'content') and m.content:
|
||||
m.content = redact_sensitive_text(m.content)
|
||||
result_dict = result.to_dict()
|
||||
|
||||
if count > 1:
|
||||
|
|
@ -266,7 +279,13 @@ def search_tool(pattern: str, target: str = "content", path: str = ".",
|
|||
"The results have not changed. Use the information you already have."
|
||||
)
|
||||
|
||||
return json.dumps(result_dict, ensure_ascii=False)
|
||||
result_json = json.dumps(result_dict, ensure_ascii=False)
|
||||
# Hint when results were truncated — explicit next offset is clearer
|
||||
# than relying on the model to infer it from total_count vs match count.
|
||||
if result_dict.get("truncated"):
|
||||
next_offset = offset + limit
|
||||
result_json += f"\n\n[Hint: Results truncated. Use offset={next_offset} to see more, or narrow with a more specific pattern or file_glob.]"
|
||||
return result_json
|
||||
except Exception as e:
|
||||
return json.dumps({"error": str(e)}, ensure_ascii=False)
|
||||
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ human-friendly channel names to IDs. Works in both CLI and gateway contexts.
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -32,7 +33,7 @@ SEND_MESSAGE_SCHEMA = {
|
|||
},
|
||||
"target": {
|
||||
"type": "string",
|
||||
"description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', or 'platform:chat_id'. Examples: 'telegram', 'discord:#bot-home', 'slack:#engineering'"
|
||||
"description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', or 'platform:chat_id'. Examples: 'telegram', 'discord:#bot-home', 'slack:#engineering', 'signal:+15551234567'"
|
||||
},
|
||||
"message": {
|
||||
"type": "string",
|
||||
|
|
@ -107,6 +108,7 @@ def _handle_send(args):
|
|||
"discord": Platform.DISCORD,
|
||||
"slack": Platform.SLACK,
|
||||
"whatsapp": Platform.WHATSAPP,
|
||||
"signal": Platform.SIGNAL,
|
||||
}
|
||||
platform = platform_map.get(platform_name)
|
||||
if not platform:
|
||||
|
|
@ -160,6 +162,8 @@ async def _send_to_platform(platform, pconfig, chat_id, message):
|
|||
return await _send_discord(pconfig.token, chat_id, message)
|
||||
elif platform == Platform.SLACK:
|
||||
return await _send_slack(pconfig.token, chat_id, message)
|
||||
elif platform == Platform.SIGNAL:
|
||||
return await _send_signal(pconfig.extra, chat_id, message)
|
||||
return {"error": f"Direct sending not yet implemented for {platform.value}"}
|
||||
|
||||
|
||||
|
|
@ -219,6 +223,42 @@ async def _send_slack(token, chat_id, message):
|
|||
return {"error": f"Slack send failed: {e}"}
|
||||
|
||||
|
||||
async def _send_signal(extra, chat_id, message):
|
||||
"""Send via signal-cli JSON-RPC API."""
|
||||
try:
|
||||
import httpx
|
||||
except ImportError:
|
||||
return {"error": "httpx not installed"}
|
||||
try:
|
||||
http_url = extra.get("http_url", "http://127.0.0.1:8080").rstrip("/")
|
||||
account = extra.get("account", "")
|
||||
if not account:
|
||||
return {"error": "Signal account not configured"}
|
||||
|
||||
params = {"account": account, "message": message}
|
||||
if chat_id.startswith("group:"):
|
||||
params["groupId"] = chat_id[6:]
|
||||
else:
|
||||
params["recipient"] = [chat_id]
|
||||
|
||||
payload = {
|
||||
"jsonrpc": "2.0",
|
||||
"method": "send",
|
||||
"params": params,
|
||||
"id": f"send_{int(time.time() * 1000)}",
|
||||
}
|
||||
|
||||
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||
resp = await client.post(f"{http_url}/api/v1/rpc", json=payload)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
if "error" in data:
|
||||
return {"error": f"Signal RPC error: {data['error']}"}
|
||||
return {"success": True, "platform": "signal", "chat_id": chat_id}
|
||||
except Exception as e:
|
||||
return {"error": f"Signal send failed: {e}"}
|
||||
|
||||
|
||||
def _check_send_message():
|
||||
"""Gate send_message on gateway running (always available on messaging platforms)."""
|
||||
platform = os.getenv("HERMES_SESSION_PLATFORM", "")
|
||||
|
|
|
|||
|
|
@ -946,6 +946,11 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult,
|
|||
client = OpenAI(
|
||||
base_url=OPENROUTER_BASE_URL,
|
||||
api_key=api_key,
|
||||
default_headers={
|
||||
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
|
||||
"X-OpenRouter-Title": "Hermes Agent",
|
||||
"X-OpenRouter-Categories": "productivity,cli-agent",
|
||||
},
|
||||
)
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
|
|
|
|||
|
|
@ -69,10 +69,36 @@ def _read_manifest() -> Dict[str, str]:
|
|||
|
||||
|
||||
def _write_manifest(entries: Dict[str, str]):
|
||||
"""Write the manifest file in v2 format (name:hash)."""
|
||||
"""Write the manifest file atomically in v2 format (name:hash).
|
||||
|
||||
Uses a temp file + os.replace() to avoid corruption if the process
|
||||
crashes or is interrupted mid-write.
|
||||
"""
|
||||
import tempfile
|
||||
|
||||
MANIFEST_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||
lines = [f"{name}:{hash_val}" for name, hash_val in sorted(entries.items())]
|
||||
MANIFEST_FILE.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
||||
data = "\n".join(f"{name}:{hash_val}" for name, hash_val in sorted(entries.items())) + "\n"
|
||||
|
||||
try:
|
||||
fd, tmp_path = tempfile.mkstemp(
|
||||
dir=str(MANIFEST_FILE.parent),
|
||||
prefix=".bundled_manifest_",
|
||||
suffix=".tmp",
|
||||
)
|
||||
try:
|
||||
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
||||
f.write(data)
|
||||
f.flush()
|
||||
os.fsync(f.fileno())
|
||||
os.replace(tmp_path, MANIFEST_FILE)
|
||||
except BaseException:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.debug("Failed to write skills manifest %s: %s", MANIFEST_FILE, e, exc_info=True)
|
||||
|
||||
|
||||
def _discover_bundled_skills(bundled_dir: Path) -> List[Tuple[str, Path]]:
|
||||
|
|
|
|||
|
|
@ -468,7 +468,9 @@ def _handle_vision_analyze(args, **kw):
|
|||
image_url = args.get("image_url", "")
|
||||
question = args.get("question", "")
|
||||
full_prompt = f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}"
|
||||
model = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview"
|
||||
model = (os.getenv("AUXILIARY_VISION_MODEL", "").strip()
|
||||
or DEFAULT_VISION_MODEL
|
||||
or "google/gemini-3-flash-preview")
|
||||
return vision_analyze_tool(image_url, full_prompt, model)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -85,7 +85,13 @@ DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
|
|||
|
||||
# Resolve async auxiliary client at module level.
|
||||
# Handles Codex Responses API adapter transparently.
|
||||
_aux_async_client, DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client()
|
||||
_aux_async_client, _DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client("web_extract")
|
||||
|
||||
# Allow per-task override via config.yaml auxiliary.web_extract_model
|
||||
DEFAULT_SUMMARIZER_MODEL = (
|
||||
os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip()
|
||||
or _DEFAULT_SUMMARIZER_MODEL
|
||||
)
|
||||
|
||||
_debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue