add sandbox runtime control endpoints

This commit is contained in:
Азамат Нураев 2026-04-28 21:53:26 +03:00
parent 0ca0bac9bf
commit 1b38bcfeab
17 changed files with 1408 additions and 119 deletions

View file

@ -6,14 +6,23 @@ from uuid import UUID
import pytest
from adapter.observability.noop import NoopMetrics, NoopTracer
from domain.error import SandboxConflictError
from domain.sandbox import SandboxSession, SandboxStatus
from repository.sandbox_lock import ProcessLocalSandboxLifecycleLocker
from repository.sandbox_session import InMemorySandboxSessionRepository
from usecase.interface import Attrs, AttrValue
from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox, CreateSandboxCommand
from usecase.sandbox import (
CleanupExpiredSandboxes,
CreateSandbox,
CreateSandboxCommand,
DeleteSandbox,
DeleteSandboxCommand,
)
CHAT_ID = UUID('11111111-1111-1111-1111-111111111111')
NON_CANONICAL_CHAT_ID = '11111111111111111111111111111111'
AGENT_ID = 'agent-alpha'
VOLUME_HOST_PATH = '/srv/sandbox/request-volume'
EXPIRED_CHAT_ID = UUID('22222222-2222-2222-2222-222222222222')
BOUNDARY_CHAT_ID = UUID('33333333-3333-3333-3333-333333333333')
ACTIVE_CHAT_ID = UUID('44444444-4444-4444-4444-444444444444')
@ -30,6 +39,19 @@ SESSION_CLEAN_ID = UUID('00000000-0000-0000-0000-000000000008')
SESSION_REPLACEMENT_ID = UUID('00000000-0000-0000-0000-000000000009')
def _create_command(
chat_id: UUID = CHAT_ID,
*,
agent_id: str = AGENT_ID,
volume_host_path: str = VOLUME_HOST_PATH,
) -> CreateSandboxCommand:
return CreateSandboxCommand(
chat_id=chat_id,
agent_id=agent_id,
volume_host_path=volume_host_path,
)
class FakeClock:
def __init__(self, now: datetime) -> None:
self._now = now
@ -238,6 +260,7 @@ class BlockingCreateRuntime:
def __init__(self) -> None:
self.create_calls: list[dict[str, object]] = []
self.stop_calls: list[str] = []
self.delete_calls: list[str] = []
self.create_started = threading.Event()
self.allow_create = threading.Event()
@ -246,6 +269,8 @@ class BlockingCreateRuntime:
*,
session_id: UUID,
chat_id: UUID,
agent_id: str,
volume_host_path: str,
created_at: datetime,
expires_at: datetime,
) -> SandboxSession:
@ -253,6 +278,8 @@ class BlockingCreateRuntime:
{
'session_id': session_id,
'chat_id': chat_id,
'agent_id': agent_id,
'volume_host_path': volume_host_path,
'created_at': created_at,
'expires_at': expires_at,
}
@ -266,11 +293,16 @@ class BlockingCreateRuntime:
status=SandboxStatus.RUNNING,
created_at=created_at,
expires_at=expires_at,
agent_id=agent_id,
volume_host_path=volume_host_path,
)
def stop(self, container_id: str) -> None:
self.stop_calls.append(container_id)
def delete(self, container_id: str) -> None:
self.delete_calls.append(container_id)
class StaleSnapshotRepository(InMemorySandboxSessionRepository):
def __init__(self, snapshot: SandboxSession) -> None:
@ -301,12 +333,15 @@ class FakeRuntime:
def __init__(self) -> None:
self.create_calls: list[dict[str, object]] = []
self.stop_calls: list[str] = []
self.delete_calls: list[str] = []
def create(
self,
*,
session_id: UUID,
chat_id: UUID,
agent_id: str,
volume_host_path: str,
created_at: datetime,
expires_at: datetime,
) -> SandboxSession:
@ -314,6 +349,8 @@ class FakeRuntime:
{
'session_id': session_id,
'chat_id': chat_id,
'agent_id': agent_id,
'volume_host_path': volume_host_path,
'created_at': created_at,
'expires_at': expires_at,
}
@ -325,11 +362,16 @@ class FakeRuntime:
status=SandboxStatus.RUNNING,
created_at=created_at,
expires_at=expires_at,
agent_id=agent_id,
volume_host_path=volume_host_path,
)
def stop(self, container_id: str) -> None:
self.stop_calls.append(container_id)
def delete(self, container_id: str) -> None:
self.delete_calls.append(container_id)
class FailingStopRuntime(FakeRuntime):
def __init__(self, failing_container_id: str) -> None:
@ -352,6 +394,8 @@ class FailingCreateRuntime(FakeRuntime):
*,
session_id: UUID,
chat_id: UUID,
agent_id: str,
volume_host_path: str,
created_at: datetime,
expires_at: datetime,
) -> SandboxSession:
@ -359,6 +403,8 @@ class FailingCreateRuntime(FakeRuntime):
{
'session_id': session_id,
'chat_id': chat_id,
'agent_id': agent_id,
'volume_host_path': volume_host_path,
'created_at': created_at,
'expires_at': expires_at,
}
@ -375,6 +421,8 @@ def test_create_sandbox_reuses_active_session_when_not_expired() -> None:
status=SandboxStatus.RUNNING,
created_at=now - timedelta(minutes=1),
expires_at=now + timedelta(minutes=4),
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
)
repository = InMemorySandboxSessionRepository()
repository.save(session)
@ -392,7 +440,7 @@ def test_create_sandbox_reuses_active_session_when_not_expired() -> None:
ttl=timedelta(minutes=5),
)
result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
result = usecase.execute(_create_command())
assert result == session
assert runtime.create_calls == []
@ -421,6 +469,8 @@ def test_create_sandbox_reuse_records_observability() -> None:
status=SandboxStatus.RUNNING,
created_at=now - timedelta(minutes=1),
expires_at=now + timedelta(minutes=4),
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
)
repository = InMemorySandboxSessionRepository()
repository.save(session)
@ -437,7 +487,7 @@ def test_create_sandbox_reuse_records_observability() -> None:
ttl=timedelta(minutes=5),
)
result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
result = usecase.execute(_create_command())
assert result == session
_assert_increment_metric_present(
@ -486,7 +536,7 @@ def test_create_sandbox_replace_records_observability_and_final_active_count(
)
monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
result = usecase.execute(_create_command())
assert result.session_id == SESSION_NEW_ID
assert repository.count_active() == 1
@ -543,13 +593,15 @@ def test_create_sandbox_replaces_expired_session_and_creates_new_one(
)
monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
result = usecase.execute(_create_command())
assert runtime.stop_calls == ['container-old']
assert runtime.create_calls == [
{
'session_id': SESSION_NEW_ID,
'chat_id': CHAT_ID,
'agent_id': AGENT_ID,
'volume_host_path': VOLUME_HOST_PATH,
'created_at': now,
'expires_at': now + timedelta(minutes=5),
}
@ -561,6 +613,8 @@ def test_create_sandbox_replaces_expired_session_and_creates_new_one(
status=SandboxStatus.RUNNING,
created_at=now,
expires_at=now + timedelta(minutes=5),
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
)
assert repository.get_active_by_chat_id(CHAT_ID) == result
assert locker.chat_ids == [CHAT_ID]
@ -603,7 +657,7 @@ def test_create_sandbox_creates_new_session_when_none_exists() -> None:
ttl=timedelta(minutes=5),
)
result = usecase.execute(CreateSandboxCommand(chat_id=UUID(NON_CANONICAL_CHAT_ID)))
result = usecase.execute(_create_command(UUID(NON_CANONICAL_CHAT_ID)))
assert result.chat_id == CHAT_ID
assert result.container_id == f'container-{result.session_id}'
@ -614,6 +668,8 @@ def test_create_sandbox_creates_new_session_when_none_exists() -> None:
assert runtime.create_calls[0] == {
'session_id': result.session_id,
'chat_id': CHAT_ID,
'agent_id': AGENT_ID,
'volume_host_path': VOLUME_HOST_PATH,
'created_at': now,
'expires_at': now + timedelta(minutes=5),
}
@ -633,6 +689,105 @@ def test_create_sandbox_creates_new_session_when_none_exists() -> None:
]
def test_create_sandbox_passes_agent_and_volume_params_to_runtime() -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
repository = InMemorySandboxSessionRepository()
runtime = FakeRuntime()
usecase = CreateSandbox(
repository=repository,
locker=FakeLocker(),
runtime=runtime,
clock=FakeClock(now),
logger=FakeLogger(),
metrics=NoopMetrics(),
tracer=NoopTracer(),
ttl=timedelta(minutes=5),
)
result = usecase.execute(_create_command())
assert len(runtime.create_calls) == 1
assert runtime.create_calls[0]['agent_id'] == AGENT_ID
assert runtime.create_calls[0]['volume_host_path'] == VOLUME_HOST_PATH
assert result.agent_id == AGENT_ID
assert result.volume_host_path == VOLUME_HOST_PATH
assert repository.get_active_by_chat_id(CHAT_ID) == result
def test_create_sandbox_reuses_active_session_when_params_match() -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
session = SandboxSession(
session_id=SESSION_REUSED_ID,
chat_id=CHAT_ID,
container_id='container-1',
status=SandboxStatus.RUNNING,
created_at=now - timedelta(minutes=1),
expires_at=now + timedelta(minutes=4),
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
)
repository = InMemorySandboxSessionRepository()
repository.save(session)
runtime = FakeRuntime()
usecase = CreateSandbox(
repository=repository,
locker=FakeLocker(),
runtime=runtime,
clock=FakeClock(now),
logger=FakeLogger(),
metrics=NoopMetrics(),
tracer=NoopTracer(),
ttl=timedelta(minutes=5),
)
result = usecase.execute(_create_command())
assert result == session
assert runtime.create_calls == []
assert runtime.stop_calls == []
assert runtime.delete_calls == []
def test_create_sandbox_reuse_mismatch_raises_conflict_without_runtime_calls() -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
session = SandboxSession(
session_id=SESSION_REUSED_ID,
chat_id=CHAT_ID,
container_id='container-1',
status=SandboxStatus.RUNNING,
created_at=now - timedelta(minutes=1),
expires_at=now + timedelta(minutes=4),
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
)
repository = InMemorySandboxSessionRepository()
repository.save(session)
runtime = FakeRuntime()
usecase = CreateSandbox(
repository=repository,
locker=FakeLocker(),
runtime=runtime,
clock=FakeClock(now),
logger=FakeLogger(),
metrics=NoopMetrics(),
tracer=NoopTracer(),
ttl=timedelta(minutes=5),
)
with pytest.raises(SandboxConflictError):
usecase.execute(
_create_command(
agent_id='agent-beta',
volume_host_path='/srv/sandbox/other-volume',
)
)
assert runtime.create_calls == []
assert runtime.stop_calls == []
assert runtime.delete_calls == []
assert repository.get_active_by_chat_id(CHAT_ID) == session
def test_create_sandbox_error_records_observability(monkeypatch) -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
metrics = RecordingMetrics()
@ -650,7 +805,7 @@ def test_create_sandbox_error_records_observability(monkeypatch) -> None:
monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
with pytest.raises(RuntimeError, match='create_failed') as excinfo:
usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
usecase.execute(_create_command())
_assert_increment_metric_present(
metrics,
@ -688,7 +843,7 @@ def test_create_sandbox_save_failure_stops_untracked_container(monkeypatch) -> N
monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
with pytest.raises(RuntimeError, match='save_failed'):
usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
usecase.execute(_create_command())
assert len(runtime.create_calls) == 1
assert runtime.stop_calls == [f'container-{SESSION_NEW_ID}']
@ -731,7 +886,7 @@ def test_create_sandbox_replace_stop_failure_preserves_separate_identities(
monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
with pytest.raises(RuntimeError, match='stop_failed') as excinfo:
usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
usecase.execute(_create_command())
_assert_increment_metric_present(
metrics,
@ -786,7 +941,7 @@ def test_create_sandbox_replace_save_failure_records_stage_safe_trace_ids(
monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
with pytest.raises(RuntimeError, match='save_failed') as excinfo:
usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
usecase.execute(_create_command())
assert runtime.stop_calls == ['container-old', f'container-{SESSION_NEW_ID}']
assert len(runtime.create_calls) == 1
@ -840,7 +995,7 @@ def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id(
def run_create(index: int) -> None:
try:
results[index] = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
results[index] = usecase.execute(_create_command())
except Exception as exc:
errors.append(exc)
@ -868,6 +1023,8 @@ def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id(
status=SandboxStatus.RUNNING,
created_at=now,
expires_at=now + timedelta(minutes=5),
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
)
assert len(runtime.create_calls) == 1
assert runtime.stop_calls == []
@ -895,6 +1052,67 @@ def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id(
]
def test_delete_sandbox_deletes_session_and_removes_registry_entry() -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
session = SandboxSession(
session_id=SESSION_ACTIVE_ID,
chat_id=CHAT_ID,
container_id='container-active',
status=SandboxStatus.RUNNING,
created_at=now - timedelta(minutes=1),
expires_at=now + timedelta(minutes=4),
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
)
repository = InMemorySandboxSessionRepository()
repository.save(session)
runtime = FakeRuntime()
locker = FakeLocker()
usecase = DeleteSandbox(
repository=repository,
locker=locker,
runtime=runtime,
logger=FakeLogger(),
metrics=NoopMetrics(),
tracer=NoopTracer(),
)
result = usecase.execute(DeleteSandboxCommand(chat_id=CHAT_ID))
assert result.chat_id == CHAT_ID
assert result.result == 'deleted'
assert result.session_id == SESSION_ACTIVE_ID
assert result.container_id == 'container-active'
assert runtime.delete_calls == ['container-active']
assert runtime.stop_calls == []
assert repository.get_active_by_chat_id(CHAT_ID) is None
assert locker.chat_ids == [CHAT_ID]
def test_delete_sandbox_returns_idempotent_not_found_without_runtime_calls() -> None:
runtime = FakeRuntime()
locker = FakeLocker()
usecase = DeleteSandbox(
repository=InMemorySandboxSessionRepository(),
locker=locker,
runtime=runtime,
logger=FakeLogger(),
metrics=NoopMetrics(),
tracer=NoopTracer(),
)
result = usecase.execute(DeleteSandboxCommand(chat_id=CHAT_ID))
assert result.chat_id == CHAT_ID
assert result.result == 'not_found'
assert result.session_id is None
assert result.container_id is None
assert runtime.create_calls == []
assert runtime.stop_calls == []
assert runtime.delete_calls == []
assert locker.chat_ids == [CHAT_ID]
def test_cleanup_expired_sandboxes_stops_and_deletes_only_expired_sessions() -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
expired_session = SandboxSession(