fix: thread safety for concurrent subagent delegation (#1672)
* fix: thread safety for concurrent subagent delegation Four thread-safety fixes that prevent crashes and data races when running multiple subagents concurrently via delegate_task: 1. Remove redirect_stdout/stderr from delegate_tool — mutating global sys.stdout races with the spinner thread when multiple children start concurrently, causing segfaults. Children already run with quiet_mode=True so the redirect was redundant. 2. Split _run_single_child into _build_child_agent (main thread) + _run_single_child (worker thread). AIAgent construction creates httpx/SSL clients which are not thread-safe to initialize concurrently. 3. Add threading.Lock to SessionDB — subagents share the parent's SessionDB and call create_session/append_message from worker threads with no synchronization. 4. Add _active_children_lock to AIAgent — interrupt() iterates _active_children while worker threads append/remove children. 5. Add _client_cache_lock to auxiliary_client — multiple subagent threads may resolve clients concurrently via call_llm(). Based on PR #1471 by peteromallet. * feat: Honcho base_url override via config.yaml + quick command alias type Two features salvaged from PR #1576: 1. Honcho base_url override: allows pointing Hermes at a remote self-hosted Honcho deployment via config.yaml: honcho: base_url: "http://192.168.x.x:8000" When set, this overrides the Honcho SDK's environment mapping (production/local), enabling LAN/VPN Honcho deployments without requiring the server to live on localhost. Uses config.yaml instead of env var (HONCHO_URL) per project convention. 2. Quick command alias type: adds a new 'alias' quick command type that rewrites to another slash command before normal dispatch: quick_commands: sc: type: alias target: /context Supports both CLI and gateway. Arguments are forwarded to the target command. Based on PR #1576 by redhelix. --------- Co-authored-by: peteromallet <peteromallet@users.noreply.github.com> Co-authored-by: redhelix <redhelix@users.noreply.github.com>
This commit is contained in:
parent
fd61ae13e5
commit
1d5a39e002
14 changed files with 397 additions and 272 deletions
|
|
@ -24,6 +24,7 @@ def main() -> int:
|
|||
parent._interrupt_requested = False
|
||||
parent._interrupt_message = None
|
||||
parent._active_children = []
|
||||
parent._active_children_lock = threading.Lock()
|
||||
parent.quiet_mode = True
|
||||
parent.model = "test/model"
|
||||
parent.base_url = "http://localhost:1"
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ class TestCLISubagentInterrupt(unittest.TestCase):
|
|||
parent._interrupt_requested = False
|
||||
parent._interrupt_message = None
|
||||
parent._active_children = []
|
||||
parent._active_children_lock = threading.Lock()
|
||||
parent.quiet_mode = True
|
||||
parent.model = "test/model"
|
||||
parent.base_url = "http://localhost:1"
|
||||
|
|
@ -112,21 +113,21 @@ class TestCLISubagentInterrupt(unittest.TestCase):
|
|||
mock_instance._interrupt_requested = False
|
||||
mock_instance._interrupt_message = None
|
||||
mock_instance._active_children = []
|
||||
mock_instance._active_children_lock = threading.Lock()
|
||||
mock_instance.quiet_mode = True
|
||||
mock_instance.run_conversation = mock_child_run_conversation
|
||||
mock_instance.interrupt = lambda msg=None: setattr(mock_instance, '_interrupt_requested', True) or setattr(mock_instance, '_interrupt_message', msg)
|
||||
mock_instance.tools = []
|
||||
MockAgent.return_value = mock_instance
|
||||
|
||||
|
||||
# Register child manually (normally done by _build_child_agent)
|
||||
parent._active_children.append(mock_instance)
|
||||
|
||||
result = _run_single_child(
|
||||
task_index=0,
|
||||
goal="Do something slow",
|
||||
context=None,
|
||||
toolsets=["terminal"],
|
||||
model=None,
|
||||
max_iterations=50,
|
||||
child=mock_instance,
|
||||
parent_agent=parent,
|
||||
task_count=1,
|
||||
)
|
||||
delegate_result[0] = result
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ def main() -> int:
|
|||
parent._interrupt_requested = False
|
||||
parent._interrupt_message = None
|
||||
parent._active_children = []
|
||||
parent._active_children_lock = threading.Lock()
|
||||
parent.quiet_mode = True
|
||||
parent.model = "test/model"
|
||||
parent.base_url = "http://localhost:1"
|
||||
|
|
|
|||
|
|
@ -30,12 +30,14 @@ class TestInterruptPropagationToChild(unittest.TestCase):
|
|||
parent._interrupt_requested = False
|
||||
parent._interrupt_message = None
|
||||
parent._active_children = []
|
||||
parent._active_children_lock = threading.Lock()
|
||||
parent.quiet_mode = True
|
||||
|
||||
child = AIAgent.__new__(AIAgent)
|
||||
child._interrupt_requested = False
|
||||
child._interrupt_message = None
|
||||
child._active_children = []
|
||||
child._active_children_lock = threading.Lock()
|
||||
child.quiet_mode = True
|
||||
|
||||
parent._active_children.append(child)
|
||||
|
|
@ -60,6 +62,7 @@ class TestInterruptPropagationToChild(unittest.TestCase):
|
|||
child._interrupt_message = "msg"
|
||||
child.quiet_mode = True
|
||||
child._active_children = []
|
||||
child._active_children_lock = threading.Lock()
|
||||
|
||||
# Global is set
|
||||
set_interrupt(True)
|
||||
|
|
@ -78,6 +81,7 @@ class TestInterruptPropagationToChild(unittest.TestCase):
|
|||
child._interrupt_requested = False
|
||||
child._interrupt_message = None
|
||||
child._active_children = []
|
||||
child._active_children_lock = threading.Lock()
|
||||
child.quiet_mode = True
|
||||
child.api_mode = "chat_completions"
|
||||
child.log_prefix = ""
|
||||
|
|
@ -119,12 +123,14 @@ class TestInterruptPropagationToChild(unittest.TestCase):
|
|||
parent._interrupt_requested = False
|
||||
parent._interrupt_message = None
|
||||
parent._active_children = []
|
||||
parent._active_children_lock = threading.Lock()
|
||||
parent.quiet_mode = True
|
||||
|
||||
child = AIAgent.__new__(AIAgent)
|
||||
child._interrupt_requested = False
|
||||
child._interrupt_message = None
|
||||
child._active_children = []
|
||||
child._active_children_lock = threading.Lock()
|
||||
child.quiet_mode = True
|
||||
|
||||
# Register child (simulating what _run_single_child does)
|
||||
|
|
|
|||
|
|
@ -47,6 +47,28 @@ class TestCLIQuickCommands:
|
|||
args = cli.console.print.call_args[0][0]
|
||||
assert "no output" in args.lower()
|
||||
|
||||
def test_alias_command_routes_to_target(self):
|
||||
"""Alias quick commands rewrite to the target command."""
|
||||
cli = self._make_cli({"shortcut": {"type": "alias", "target": "/help"}})
|
||||
with patch.object(cli, "process_command", wraps=cli.process_command) as spy:
|
||||
cli.process_command("/shortcut")
|
||||
# Should recursively call process_command with /help
|
||||
spy.assert_any_call("/help")
|
||||
|
||||
def test_alias_command_passes_args(self):
|
||||
"""Alias quick commands forward user arguments to the target."""
|
||||
cli = self._make_cli({"sc": {"type": "alias", "target": "/context"}})
|
||||
with patch.object(cli, "process_command", wraps=cli.process_command) as spy:
|
||||
cli.process_command("/sc some args")
|
||||
spy.assert_any_call("/context some args")
|
||||
|
||||
def test_alias_no_target_shows_error(self):
|
||||
cli = self._make_cli({"broken": {"type": "alias", "target": ""}})
|
||||
cli.process_command("/broken")
|
||||
cli.console.print.assert_called_once()
|
||||
args = cli.console.print.call_args[0][0]
|
||||
assert "no target defined" in args.lower()
|
||||
|
||||
def test_unsupported_type_shows_error(self):
|
||||
cli = self._make_cli({"bad": {"type": "prompt", "command": "echo hi"}})
|
||||
cli.process_command("/bad")
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ class TestRealSubagentInterrupt(unittest.TestCase):
|
|||
parent._interrupt_requested = False
|
||||
parent._interrupt_message = None
|
||||
parent._active_children = []
|
||||
parent._active_children_lock = threading.Lock()
|
||||
parent.quiet_mode = True
|
||||
parent.model = "test/model"
|
||||
parent.base_url = "http://localhost:1"
|
||||
|
|
@ -103,19 +104,28 @@ class TestRealSubagentInterrupt(unittest.TestCase):
|
|||
return original_run(self_agent, *args, **kwargs)
|
||||
|
||||
with patch.object(AIAgent, 'run_conversation', patched_run):
|
||||
# Build a real child agent (AIAgent is NOT patched here,
|
||||
# only run_conversation and _build_system_prompt are)
|
||||
child = AIAgent(
|
||||
base_url="http://localhost:1",
|
||||
api_key="test-key",
|
||||
model="test/model",
|
||||
provider="test",
|
||||
api_mode="chat_completions",
|
||||
max_iterations=5,
|
||||
enabled_toolsets=["terminal"],
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
platform="cli",
|
||||
)
|
||||
child._delegate_depth = 1
|
||||
parent._active_children.append(child)
|
||||
result = _run_single_child(
|
||||
task_index=0,
|
||||
goal="Test task",
|
||||
context=None,
|
||||
toolsets=["terminal"],
|
||||
model="test/model",
|
||||
max_iterations=5,
|
||||
child=child,
|
||||
parent_agent=parent,
|
||||
task_count=1,
|
||||
override_provider="test",
|
||||
override_base_url="http://localhost:1",
|
||||
override_api_key="test",
|
||||
override_api_mode="chat_completions",
|
||||
)
|
||||
result_holder[0] = result
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ Run with: python -m pytest tests/test_delegate.py -v
|
|||
import json
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
import unittest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
|
|
@ -44,6 +45,7 @@ def _make_mock_parent(depth=0):
|
|||
parent._session_db = None
|
||||
parent._delegate_depth = depth
|
||||
parent._active_children = []
|
||||
parent._active_children_lock = threading.Lock()
|
||||
return parent
|
||||
|
||||
|
||||
|
|
@ -722,7 +724,12 @@ class TestDelegationProviderIntegration(unittest.TestCase):
|
|||
}
|
||||
parent = _make_mock_parent(depth=0)
|
||||
|
||||
with patch("tools.delegate_tool._run_single_child") as mock_run:
|
||||
# Patch _build_child_agent since credentials are now passed there
|
||||
# (agents are built in the main thread before being handed to workers)
|
||||
with patch("tools.delegate_tool._build_child_agent") as mock_build, \
|
||||
patch("tools.delegate_tool._run_single_child") as mock_run:
|
||||
mock_child = MagicMock()
|
||||
mock_build.return_value = mock_child
|
||||
mock_run.return_value = {
|
||||
"task_index": 0, "status": "completed",
|
||||
"summary": "Done", "api_calls": 1, "duration_seconds": 1.0
|
||||
|
|
@ -731,7 +738,8 @@ class TestDelegationProviderIntegration(unittest.TestCase):
|
|||
tasks = [{"goal": "Task A"}, {"goal": "Task B"}]
|
||||
delegate_task(tasks=tasks, parent_agent=parent)
|
||||
|
||||
for call in mock_run.call_args_list:
|
||||
self.assertEqual(mock_build.call_count, 2)
|
||||
for call in mock_build.call_args_list:
|
||||
self.assertEqual(call.kwargs.get("model"), "meta-llama/llama-4-scout")
|
||||
self.assertEqual(call.kwargs.get("override_provider"), "openrouter")
|
||||
self.assertEqual(call.kwargs.get("override_base_url"), "https://openrouter.ai/api/v1")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue