diff --git a/browser_env/entrypoint.sh b/browser_env/entrypoint.sh index 052ca6c5..bf426a96 100644 --- a/browser_env/entrypoint.sh +++ b/browser_env/entrypoint.sh @@ -98,7 +98,7 @@ if [ ! -f /var/lib/dbus/machine-id ]; then dbus-uuidgen > /var/lib/dbus/machine-id 2>/dev/null || true fi -# Удаляем stale lock/socket от прошлых падений Xvfb на том же DISPLAY. +# Удаляем stale lock/socket от прошлых падений Xvfb на том же DISPLAY. rm -f "/tmp/.X${DISPLAY_NUM}-lock" "/tmp/.X11-unix/X${DISPLAY_NUM}" || true log "starting X stack on DISPLAY=${DISPLAY}" diff --git a/hermes_code/agent/prompt_builder.py b/hermes_code/agent/prompt_builder.py index d6c4c6a6..f592c206 100644 --- a/hermes_code/agent/prompt_builder.py +++ b/hermes_code/agent/prompt_builder.py @@ -154,6 +154,15 @@ SKILLS_GUIDANCE = ( "Skills that aren't maintained become liabilities." ) +BROWSER_CAPTCHA_GUIDANCE = ( + "For browser tasks, do not pre-emptively refuse just because CAPTCHA may appear. " + "Start the task with internet_browser. " + "If the browser runtime reports status='awaiting_user_captcha', immediately use to_captcha " + "to hand control to the user for manual verification and then resume. " + "Important: you must never claim that you solved CAPTCHA yourself and you must not attempt bypass methods. " + "Your role is orchestration: run browser steps, request manual CAPTCHA completion from the user, verify, and continue." +) + PLATFORM_HINTS = { "whatsapp": ( "You are on a text messaging communication platform, WhatsApp. " diff --git a/hermes_code/run_agent.py b/hermes_code/run_agent.py index 3d680d25..21384dcd 100644 --- a/hermes_code/run_agent.py +++ b/hermes_code/run_agent.py @@ -73,6 +73,7 @@ from hermes_constants import OPENROUTER_BASE_URL from agent.prompt_builder import ( DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS, MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE, + BROWSER_CAPTCHA_GUIDANCE, ) from agent.model_metadata import ( fetch_model_metadata, @@ -2279,6 +2280,8 @@ class AIAgent: tool_guidance.append(SESSION_SEARCH_GUIDANCE) if "skill_manage" in self.valid_tool_names: tool_guidance.append(SKILLS_GUIDANCE) + if "internet_browser" in self.valid_tool_names and "to_captcha" in self.valid_tool_names: + tool_guidance.append(BROWSER_CAPTCHA_GUIDANCE) if tool_guidance: prompt_parts.append(" ".join(tool_guidance))