add new tool: to_captcha

This commit is contained in:
VladislavIlin7 2026-04-21 23:32:09 +03:00
parent 50589232d6
commit f1f32d8366
14 changed files with 1008 additions and 130 deletions

View file

@ -400,6 +400,7 @@ class AIAgent:
thinking_callback: callable = None,
reasoning_callback: callable = None,
clarify_callback: callable = None,
captcha_callback: callable = None,
step_callback: callable = None,
stream_delta_callback: callable = None,
tool_gen_callback: callable = None,
@ -447,6 +448,7 @@ class AIAgent:
tool_progress_callback (callable): Callback function(tool_name, args_preview) for progress notifications
clarify_callback (callable): Callback function(question, choices) -> str for interactive user questions.
Provided by the platform layer (CLI or gateway). If None, the clarify tool returns an error.
captcha_callback (callable): Callback function(payload_dict) -> dict for manual CAPTCHA completion flows.
max_tokens (int): Maximum tokens for model responses (optional, uses model default if not set)
reasoning_config (Dict): OpenRouter reasoning configuration override (e.g. {"effort": "none"} to disable thinking).
If None, defaults to {"enabled": True, "effort": "medium"} for OpenRouter. Set to disable/customize reasoning.
@ -529,6 +531,7 @@ class AIAgent:
self.thinking_callback = thinking_callback
self.reasoning_callback = reasoning_callback
self.clarify_callback = clarify_callback
self.captcha_callback = captcha_callback
self.step_callback = step_callback
self.stream_delta_callback = stream_delta_callback
self.status_callback = status_callback
@ -4693,6 +4696,18 @@ class AIAgent:
choices=function_args.get("choices"),
callback=self.clarify_callback,
)
elif function_name == "to_captcha":
from tools.to_captcha_tool import to_captcha_tool as _to_captcha_tool
return _to_captcha_tool(
task_id=function_args.get("task_id", ""),
browser_view_url=function_args.get("browser_view_url"),
captcha_type=function_args.get("captcha_type"),
instructions=function_args.get("instructions"),
detected_at=function_args.get("detected_at"),
verification=function_args.get("verification"),
resume_token=function_args.get("resume_token"),
callback=self.captcha_callback,
)
elif function_name == "delegate_task":
from tools.delegate_tool import delegate_task as _delegate_task
return _delegate_task(
@ -4711,6 +4726,53 @@ class AIAgent:
honcho_session_key=self._honcho_session_key,
)
def _maybe_resolve_captcha(self, function_name: str, function_result: str, effective_task_id: str) -> str:
"""Bridge paused browser tasks into the dedicated CAPTCHA orchestration flow."""
if function_name != "internet_browser":
return function_result
try:
payload = json.loads(function_result)
except (json.JSONDecodeError, TypeError):
return function_result
if not isinstance(payload, dict):
return function_result
if payload.get("status") != "awaiting_user_captcha":
return function_result
human = payload.get("human_intervention") or {}
captcha_args = {
"task_id": payload.get("task_id") or human.get("task_id") or effective_task_id,
"browser_view_url": human.get("browser_view_url"),
"captcha_type": human.get("captcha_type"),
"instructions": human.get("instructions"),
"detected_at": human.get("detected_at"),
"verification": human.get("verification"),
"resume_token": human.get("resume_token"),
}
captcha_result = self._invoke_tool("to_captcha", captcha_args, effective_task_id)
try:
captcha_payload = json.loads(captcha_result)
except (json.JSONDecodeError, TypeError):
return captcha_result
if not isinstance(captcha_payload, dict):
return captcha_result
if captcha_payload.get("status") == "resumed":
task_result = captcha_payload.get("task_result")
if isinstance(task_result, dict):
return json.dumps(task_result, ensure_ascii=False)
return json.dumps(
{
"success": False,
"status": captcha_payload.get("status", "still_blocked"),
"task_id": captcha_args["task_id"],
"human_intervention": human,
"captcha_flow": captcha_payload,
},
ensure_ascii=False,
)
def _execute_tool_calls_concurrent(self, assistant_message, messages: list, effective_task_id: str, api_call_count: int = 0) -> None:
"""Execute multiple tool calls concurrently using a thread pool.
@ -4843,6 +4905,8 @@ class AIAgent:
tool_duration = 0.0
else:
function_name, function_args, function_result, tool_duration, is_error = r
function_result = self._maybe_resolve_captcha(function_name, function_result, effective_task_id)
is_error, _ = _detect_tool_failure(function_name, function_result)
if is_error:
result_preview = function_result[:200] if len(function_result) > 200 else function_result
@ -5029,6 +5093,21 @@ class AIAgent:
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
self._vprint(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}")
elif function_name == "to_captcha":
from tools.to_captcha_tool import to_captcha_tool as _to_captcha_tool
function_result = _to_captcha_tool(
task_id=function_args.get("task_id", ""),
browser_view_url=function_args.get("browser_view_url"),
captcha_type=function_args.get("captcha_type"),
instructions=function_args.get("instructions"),
detected_at=function_args.get("detected_at"),
verification=function_args.get("verification"),
resume_token=function_args.get("resume_token"),
callback=self.captcha_callback,
)
tool_duration = time.time() - tool_start_time
if self.quiet_mode:
self._vprint(f" {_get_cute_tool_message_impl('to_captcha', function_args, tool_duration, result=function_result)}")
elif function_name == "delegate_task":
from tools.delegate_tool import delegate_task as _delegate_task
tasks_arg = function_args.get("tasks")
@ -5099,6 +5178,7 @@ class AIAgent:
logger.error("handle_function_call raised for %s: %s", function_name, tool_error, exc_info=True)
tool_duration = time.time() - tool_start_time
function_result = self._maybe_resolve_captcha(function_name, function_result, effective_task_id)
result_preview = function_result if self.verbose_logging else (
function_result[:200] if len(function_result) > 200 else function_result
)