add sandbox runtime control endpoints

This commit is contained in:
Азамат Нураев 2026-04-28 21:53:26 +03:00
parent 0ca0bac9bf
commit 1b38bcfeab
17 changed files with 1408 additions and 119 deletions

View file

@ -27,16 +27,26 @@ from adapter.http.fastapi import app as app_module
from adapter.observability.noop import NoopMetrics, NoopTracer
from adapter.observability.runtime import ObservabilityRuntime
from adapter.sandbox.reconciliation import SandboxSessionReconciler
from domain.error import SandboxError, SandboxStartError
from domain.sandbox import SandboxSession, SandboxStatus
from domain.error import SandboxConflictError, SandboxError, SandboxStartError
from domain.sandbox import SandboxEndpoint, SandboxSession, SandboxStatus
from repository.sandbox_lock import ProcessLocalSandboxLifecycleLocker
from repository.sandbox_session import InMemorySandboxSessionRepository
from usecase.interface import Attrs
from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox, CreateSandboxCommand
from usecase.sandbox import (
CleanupExpiredSandboxes,
CreateSandbox,
CreateSandboxCommand,
DeleteSandbox,
DeleteSandboxCommand,
DeleteSandboxResult,
)
CHAT_ID = UUID('123e4567-e89b-12d3-a456-426614174000')
NON_CANONICAL_CHAT_ID = '123E4567E89B12D3A456426614174000'
SESSION_ID = UUID('00000000-0000-0000-0000-000000000011')
AGENT_ID = 'agent-alpha'
VOLUME_HOST_PATH = '/srv/sandbox/request-volume'
ENDPOINT = SandboxEndpoint(ip='172.20.0.8', port=8000)
class FakeLogger:
@ -82,6 +92,18 @@ class FakeCleanupExpiredSandboxes(CleanupExpiredSandboxes):
return []
class FakeDeleteSandboxUsecase(DeleteSandbox):
def __init__(self, result: DeleteSandboxResult | None = None) -> None:
self._result = result
self.commands: list[DeleteSandboxCommand] = []
def execute(self, command: DeleteSandboxCommand) -> DeleteSandboxResult:
self.commands.append(command)
if self._result is None:
return DeleteSandboxResult(chat_id=command.chat_id, result='not_found')
return self._result
class FakeDockerClient(DockerClient):
def __init__(self, base_url: str | None = None) -> None:
self.base_url = base_url
@ -197,10 +219,18 @@ class FakeLifecycleRuntime:
*,
session_id: UUID,
chat_id: UUID,
agent_id: str,
volume_host_path: str,
created_at: datetime,
expires_at: datetime,
) -> SandboxSession:
self.create_calls.append(CreateSandboxCommand(chat_id=chat_id))
self.create_calls.append(
CreateSandboxCommand(
chat_id=chat_id,
agent_id=agent_id,
volume_host_path=volume_host_path,
)
)
session = SandboxSession(
session_id=session_id,
chat_id=chat_id,
@ -208,6 +238,9 @@ class FakeLifecycleRuntime:
status=SandboxStatus.RUNNING,
created_at=created_at,
expires_at=expires_at,
agent_id=agent_id,
volume_host_path=volume_host_path,
endpoint=ENDPOINT,
)
self._sessions = [
existing for existing in self._sessions if existing.chat_id != chat_id
@ -218,6 +251,9 @@ class FakeLifecycleRuntime:
def stop(self, container_id: str) -> None:
self.stop_calls.append(container_id)
def delete(self, container_id: str) -> None:
self.stop_calls.append(container_id)
class FixedSandboxState:
def __init__(self, sessions: list[SandboxSession]) -> None:
@ -287,6 +323,8 @@ def build_config() -> AppConfig:
docker=DockerConfig(base_url='unix:///var/run/docker.sock'),
sandbox=SandboxConfig(
image='sandbox:latest',
network_name='sandbox',
agent_service_port=8000,
ttl_seconds=300,
cleanup_interval_seconds=60,
chats_root='/tmp/chats',
@ -295,6 +333,7 @@ def build_config() -> AppConfig:
chat_mount_path='/workspace/chat',
dependencies_mount_path='/workspace/dependencies',
lambda_tools_mount_path='/workspace/lambda-tools',
volume_mount_path='/workspace/volume',
),
security=SecurityConfig(
token_header='Authorization',
@ -310,6 +349,7 @@ def build_container(
cleanup_usecase: CleanupExpiredSandboxes,
logger: FakeLogger,
docker_client: FakeDockerClient,
delete_sandbox_usecase: DeleteSandbox | None = None,
sandbox_reconciler: SandboxSessionReconciler | None = None,
) -> AppContainer:
observability = ObservabilityRuntime(
@ -330,6 +370,7 @@ def build_container(
usecases = AppUsecases(
create_sandbox=create_sandbox_usecase,
cleanup_expired_sandboxes=cleanup_usecase,
delete_sandbox=delete_sandbox_usecase or FakeDeleteSandboxUsecase(),
)
return AppContainer(
config=config,
@ -419,6 +460,10 @@ async def get_json(app: FastAPI, path: str) -> tuple[int, dict[str, object]]:
return await request_json(app, 'GET', path)
async def delete_json(app: FastAPI, path: str) -> tuple[int, dict[str, object]]:
return await request_json(app, 'DELETE', path)
async def exercise_create_request(
app: FastAPI,
payload: dict[str, str],
@ -445,6 +490,19 @@ async def exercise_get_request(
await app.router.shutdown()
async def exercise_delete_request(
app: FastAPI,
path: str,
) -> tuple[int, dict[str, object]]:
await app.router.startup()
try:
status, response = await delete_json(app, path)
await asyncio.sleep(0)
return status, response
finally:
await app.router.shutdown()
def test_post_create_returns_session_with_canonical_chat_id(monkeypatch) -> None:
config = build_config()
expires_at = datetime(2026, 4, 2, 12, 5, tzinfo=UTC)
@ -455,6 +513,9 @@ def test_post_create_returns_session_with_canonical_chat_id(monkeypatch) -> None
status=SandboxStatus.RUNNING,
created_at=expires_at - timedelta(minutes=5),
expires_at=expires_at,
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
endpoint=ENDPOINT,
)
logger = FakeLogger()
create_usecase = FakeCreateSandboxUsecase(session=session)
@ -475,19 +536,31 @@ def test_post_create_returns_session_with_canonical_chat_id(monkeypatch) -> None
app = app_module.create_app(config=config)
status_code, response = asyncio.run(
exercise_create_request(app, {'chat_id': NON_CANONICAL_CHAT_ID})
exercise_create_request(
app,
{
'chat_id': NON_CANONICAL_CHAT_ID,
'agent_id': AGENT_ID,
'volume_host_path': VOLUME_HOST_PATH,
},
)
)
assert status_code == 200
assert response == {
'session_id': str(SESSION_ID),
'chat_id': str(CHAT_ID),
'agent_id': AGENT_ID,
'volume_host_path': VOLUME_HOST_PATH,
'container_id': 'container-123',
'endpoint': {'ip': '172.20.0.8', 'port': 8000},
'status': 'running',
'expires_at': '2026-04-02T12:05:00Z',
}
assert len(create_usecase.commands) == 1
assert create_usecase.commands[0].chat_id == CHAT_ID
assert create_usecase.commands[0].agent_id == AGENT_ID
assert create_usecase.commands[0].volume_host_path == VOLUME_HOST_PATH
assert cleanup_usecase.calls >= 1
assert any(
message == 'http_request'
@ -498,6 +571,55 @@ def test_post_create_returns_session_with_canonical_chat_id(monkeypatch) -> None
assert docker_client.close_calls == 1
def test_post_create_canonicalizes_volume_path_before_usecase(monkeypatch) -> None:
config = build_config()
expires_at = datetime(2026, 4, 2, 12, 5, tzinfo=UTC)
session = SandboxSession(
session_id=SESSION_ID,
chat_id=CHAT_ID,
container_id='container-123',
status=SandboxStatus.RUNNING,
created_at=expires_at - timedelta(minutes=5),
expires_at=expires_at,
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
endpoint=ENDPOINT,
)
logger = FakeLogger()
create_usecase = FakeCreateSandboxUsecase(session=session)
cleanup_usecase = FakeCleanupExpiredSandboxes()
docker_client = FakeDockerClient()
container = build_container(
config,
create_usecase,
cleanup_usecase,
logger,
docker_client,
)
monkeypatch.setattr(app_module, 'build_container', lambda **kwargs: container)
monkeypatch.setattr(
app_module.FastAPIInstrumentor, 'instrument_app', lambda *args, **kwargs: None
)
app = app_module.create_app(config=config)
status_code, response = asyncio.run(
exercise_create_request(
app,
{
'chat_id': str(CHAT_ID),
'agent_id': AGENT_ID,
'volume_host_path': '/srv/sandbox/a/../request-volume',
},
)
)
assert status_code == 200
assert response['volume_host_path'] == VOLUME_HOST_PATH
assert len(create_usecase.commands) == 1
assert create_usecase.commands[0].volume_host_path == VOLUME_HOST_PATH
assert docker_client.close_calls == 1
def test_post_create_rejects_non_uuid_chat_id(monkeypatch) -> None:
config = build_config()
expires_at = datetime(2026, 4, 2, 12, 5, tzinfo=UTC)
@ -528,7 +650,14 @@ def test_post_create_rejects_non_uuid_chat_id(monkeypatch) -> None:
app = app_module.create_app(config=config)
status_code, response = asyncio.run(
exercise_create_request(app, {'chat_id': 'x/../y'})
exercise_create_request(
app,
{
'chat_id': 'x/../y',
'agent_id': AGENT_ID,
'volume_host_path': VOLUME_HOST_PATH,
},
)
)
assert status_code == 422
@ -537,6 +666,94 @@ def test_post_create_rejects_non_uuid_chat_id(monkeypatch) -> None:
assert docker_client.close_calls == 1
@pytest.mark.parametrize(
'payload',
[
{'chat_id': str(CHAT_ID), 'volume_host_path': VOLUME_HOST_PATH},
{'chat_id': str(CHAT_ID), 'agent_id': AGENT_ID, 'volume_host_path': 'relative'},
],
)
def test_post_create_rejects_missing_or_invalid_runtime_params(
monkeypatch,
payload: dict[str, str],
) -> None:
config = build_config()
logger = FakeLogger()
create_usecase = FakeCreateSandboxUsecase(
session=SandboxSession(
session_id=SESSION_ID,
chat_id=CHAT_ID,
container_id='container-123',
status=SandboxStatus.RUNNING,
created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC),
expires_at=datetime(2026, 4, 2, 12, 5, tzinfo=UTC),
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
endpoint=ENDPOINT,
)
)
cleanup_usecase = FakeCleanupExpiredSandboxes()
docker_client = FakeDockerClient()
container = build_container(
config,
create_usecase,
cleanup_usecase,
logger,
docker_client,
)
monkeypatch.setattr(app_module, 'build_container', lambda **kwargs: container)
monkeypatch.setattr(
app_module.FastAPIInstrumentor, 'instrument_app', lambda *args, **kwargs: None
)
app = app_module.create_app(config=config)
status_code, response = asyncio.run(exercise_create_request(app, payload))
assert status_code == 422
assert 'detail' in response
assert create_usecase.commands == []
assert docker_client.close_calls == 1
def test_post_create_maps_conflict_to_conflict_response(monkeypatch) -> None:
config = build_config()
logger = FakeLogger()
create_usecase = FakeCreateSandboxUsecase(
error=SandboxConflictError(str(CHAT_ID))
)
cleanup_usecase = FakeCleanupExpiredSandboxes()
docker_client = FakeDockerClient()
container = build_container(
config,
create_usecase,
cleanup_usecase,
logger,
docker_client,
)
monkeypatch.setattr(app_module, 'build_container', lambda **kwargs: container)
monkeypatch.setattr(
app_module.FastAPIInstrumentor, 'instrument_app', lambda *args, **kwargs: None
)
app = app_module.create_app(config=config)
status_code, response = asyncio.run(
exercise_create_request(
app,
{
'chat_id': str(CHAT_ID),
'agent_id': AGENT_ID,
'volume_host_path': VOLUME_HOST_PATH,
},
)
)
assert status_code == 409
assert response == {'detail': 'sandbox_conflict'}
assert docker_client.close_calls == 1
def test_post_create_maps_start_errors_to_service_unavailable(monkeypatch) -> None:
config = build_config()
logger = FakeLogger()
@ -558,7 +775,14 @@ def test_post_create_maps_start_errors_to_service_unavailable(monkeypatch) -> No
app = app_module.create_app(config=config)
status_code, response = asyncio.run(
exercise_create_request(app, {'chat_id': str(CHAT_ID)})
exercise_create_request(
app,
{
'chat_id': str(CHAT_ID),
'agent_id': AGENT_ID,
'volume_host_path': VOLUME_HOST_PATH,
},
)
)
assert status_code == 503
@ -587,7 +811,14 @@ def test_post_create_maps_generic_sandbox_errors_to_internal_error(monkeypatch)
app = app_module.create_app(config=config)
status_code, response = asyncio.run(
exercise_create_request(app, {'chat_id': str(CHAT_ID)})
exercise_create_request(
app,
{
'chat_id': str(CHAT_ID),
'agent_id': AGENT_ID,
'volume_host_path': VOLUME_HOST_PATH,
},
)
)
assert status_code == 500
@ -595,6 +826,89 @@ def test_post_create_maps_generic_sandbox_errors_to_internal_error(monkeypatch)
assert docker_client.close_calls == 1
def test_delete_sandbox_endpoint_returns_deleted(monkeypatch) -> None:
config = build_config()
logger = FakeLogger()
create_usecase = FakeCreateSandboxUsecase(error=AssertionError('unused'))
cleanup_usecase = FakeCleanupExpiredSandboxes()
delete_usecase = FakeDeleteSandboxUsecase(
DeleteSandboxResult(
chat_id=CHAT_ID,
result='deleted',
session_id=SESSION_ID,
container_id='container-123',
)
)
docker_client = FakeDockerClient()
container = build_container(
config,
create_usecase,
cleanup_usecase,
logger,
docker_client,
delete_sandbox_usecase=delete_usecase,
)
monkeypatch.setattr(app_module, 'build_container', lambda **kwargs: container)
monkeypatch.setattr(
app_module.FastAPIInstrumentor, 'instrument_app', lambda *args, **kwargs: None
)
app = app_module.create_app(config=config)
status_code, response = asyncio.run(
exercise_delete_request(app, f'/api/v1/sandboxes/{CHAT_ID}')
)
assert status_code == 200
assert response == {
'chat_id': str(CHAT_ID),
'result': 'deleted',
'session_id': str(SESSION_ID),
'container_id': 'container-123',
}
assert delete_usecase.commands == [DeleteSandboxCommand(chat_id=CHAT_ID)]
assert docker_client.close_calls == 1
def test_delete_sandbox_endpoint_returns_not_found(monkeypatch) -> None:
config = build_config()
logger = FakeLogger()
create_usecase = FakeCreateSandboxUsecase(error=AssertionError('unused'))
cleanup_usecase = FakeCleanupExpiredSandboxes()
delete_usecase = FakeDeleteSandboxUsecase(
DeleteSandboxResult(chat_id=CHAT_ID, result='not_found')
)
docker_client = FakeDockerClient()
container = build_container(
config,
create_usecase,
cleanup_usecase,
logger,
docker_client,
delete_sandbox_usecase=delete_usecase,
)
monkeypatch.setattr(app_module, 'build_container', lambda **kwargs: container)
monkeypatch.setattr(
app_module.FastAPIInstrumentor, 'instrument_app', lambda *args, **kwargs: None
)
app = app_module.create_app(config=config)
status_code, response = asyncio.run(
exercise_delete_request(app, f'/api/v1/sandboxes/{CHAT_ID}')
)
assert status_code == 200
assert response == {
'chat_id': str(CHAT_ID),
'result': 'not_found',
'session_id': None,
'container_id': None,
}
assert delete_usecase.commands == [DeleteSandboxCommand(chat_id=CHAT_ID)]
assert docker_client.close_calls == 1
def test_startup_reconciliation_reuses_existing_container_after_restart(
monkeypatch,
) -> None:
@ -607,6 +921,9 @@ def test_startup_reconciliation_reuses_existing_container_after_restart(
status=SandboxStatus.RUNNING,
created_at=created_at,
expires_at=created_at + timedelta(minutes=5),
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
endpoint=ENDPOINT,
)
logger = FakeLogger()
docker_client = FakeDockerClient()
@ -618,6 +935,7 @@ def test_startup_reconciliation_reuses_existing_container_after_restart(
tracer=NoopTracer(),
)
repositories = AppRepositories(sandbox_session=repository)
locker = ProcessLocalSandboxLifecycleLocker()
reconciler = SandboxSessionReconciler(
state_source=runtime,
registry=repository,
@ -628,7 +946,7 @@ def test_startup_reconciliation_reuses_existing_container_after_restart(
usecases = AppUsecases(
create_sandbox=CreateSandbox(
repository=repository,
locker=ProcessLocalSandboxLifecycleLocker(),
locker=locker,
runtime=runtime,
clock=FakeClock(created_at),
logger=logger,
@ -638,13 +956,21 @@ def test_startup_reconciliation_reuses_existing_container_after_restart(
),
cleanup_expired_sandboxes=CleanupExpiredSandboxes(
repository=repository,
locker=ProcessLocalSandboxLifecycleLocker(),
locker=locker,
runtime=runtime,
clock=FakeClock(created_at),
logger=logger,
metrics=NoopMetrics(),
tracer=NoopTracer(),
),
delete_sandbox=DeleteSandbox(
repository=repository,
locker=locker,
runtime=runtime,
logger=logger,
metrics=NoopMetrics(),
tracer=NoopTracer(),
),
)
container = AppContainer(
config=config,
@ -662,14 +988,24 @@ def test_startup_reconciliation_reuses_existing_container_after_restart(
app = app_module.create_app(config=config)
status_code, response = asyncio.run(
exercise_create_request(app, {'chat_id': str(CHAT_ID)})
exercise_create_request(
app,
{
'chat_id': str(CHAT_ID),
'agent_id': AGENT_ID,
'volume_host_path': VOLUME_HOST_PATH,
},
)
)
assert status_code == 200
assert response == {
'session_id': str(SESSION_ID),
'chat_id': str(CHAT_ID),
'agent_id': AGENT_ID,
'volume_host_path': VOLUME_HOST_PATH,
'container_id': 'container-123',
'endpoint': {'ip': '172.20.0.8', 'port': 8000},
'status': 'running',
'expires_at': '2026-04-02T12:05:00Z',
}

View file

@ -1,3 +1,4 @@
from dataclasses import replace
from datetime import UTC, datetime, timedelta
from pathlib import Path
from types import TracebackType
@ -13,22 +14,51 @@ from adapter.config.model import SandboxConfig
from adapter.docker.runtime import DockerSandboxRuntime
from adapter.observability.noop import NoopMetrics, NoopTracer
from domain.error import SandboxError, SandboxStartError
from domain.sandbox import SandboxSession, SandboxStatus
from domain.sandbox import SandboxEndpoint, SandboxSession, SandboxStatus
from usecase.interface import Attrs, AttrValue
CHAT_ID = UUID('123e4567-e89b-12d3-a456-426614174000')
NON_CANONICAL_CHAT_ID = '123E4567E89B12D3A456426614174000'
SESSION_ID = UUID('00000000-0000-0000-0000-000000000010')
AGENT_ID = 'agent-alpha'
def _network_attrs(network_name: str = 'sandbox', ip: str = '172.20.0.8') -> dict[str, object]:
return {
'NetworkSettings': {
'Networks': {
network_name: {
'IPAddress': ip,
}
}
}
}
class FakeContainer:
def __init__(self, container_id: str) -> None:
def __init__(
self,
container_id: str,
*,
network_name: str = 'sandbox',
ip: str = '172.20.0.8',
) -> None:
self.id = container_id
self.stop_calls = 0
self.remove_calls: list[dict[str, bool]] = []
self.reload_calls = 0
self.attrs = _network_attrs(network_name, ip)
self.labels: dict[str, str] = {}
def stop(self) -> None:
self.stop_calls += 1
def reload(self) -> None:
self.reload_calls += 1
def remove(self, *, force: bool) -> None:
self.remove_calls.append({'force': force})
class FakeListedContainer(FakeContainer):
def __init__(
@ -37,10 +67,12 @@ class FakeListedContainer(FakeContainer):
*,
labels: dict[str, str],
created_at: str,
network_name: str = 'sandbox',
ip: str = '172.20.0.8',
) -> None:
super().__init__(container_id)
super().__init__(container_id, network_name=network_name, ip=ip)
self.labels = labels
self.attrs = {'Created': created_at}
self.attrs['Created'] = created_at
class FailingStopContainer(FakeListedContainer):
@ -66,8 +98,10 @@ class FailingStopContainer(FakeListedContainer):
class RunKwargs(TypedDict):
detach: bool
environment: dict[str, str]
labels: dict[str, str]
mounts: list[Mount]
network: str
class RunCall(TypedDict):
@ -90,16 +124,20 @@ class FakeContainers:
image: str,
*,
detach: bool,
environment: dict[str, str],
labels: dict[str, str],
mounts: list[Mount],
network: str,
) -> FakeContainer:
self.run_calls.append(
{
'args': (image,),
'kwargs': {
'detach': detach,
'environment': environment,
'labels': labels,
'mounts': mounts,
'network': network,
},
}
)
@ -266,6 +304,8 @@ def _find_record_call(
def build_config(tmp_path: Path) -> SandboxConfig:
return SandboxConfig(
image='sandbox:latest',
network_name='sandbox',
agent_service_port=8000,
ttl_seconds=300,
cleanup_interval_seconds=60,
chats_root=str(tmp_path / 'chats'),
@ -274,6 +314,7 @@ def build_config(tmp_path: Path) -> SandboxConfig:
chat_mount_path='/workspace/chat',
dependencies_mount_path='/workspace/dependencies',
lambda_tools_mount_path='/workspace/lambda-tools',
volume_mount_path='/workspace/volume',
)
@ -303,6 +344,8 @@ def test_runtime_create_applies_mount_policy_and_labels_with_canonical_chat_id(
session = runtime.create(
session_id=SESSION_ID,
chat_id=UUID(NON_CANONICAL_CHAT_ID),
agent_id=AGENT_ID,
volume_host_path=str(tmp_path / 'request-volume'),
created_at=created_at,
expires_at=expires_at,
)
@ -313,15 +356,25 @@ def test_runtime_create_applies_mount_policy_and_labels_with_canonical_chat_id(
assert session.status is SandboxStatus.RUNNING
assert session.created_at == created_at
assert session.expires_at == expires_at
assert session.agent_id == AGENT_ID
assert session.volume_host_path == str(
(tmp_path / 'request-volume').resolve(strict=False)
)
assert session.endpoint == SandboxEndpoint(ip='172.20.0.8', port=8000)
assert (tmp_path / 'chats' / str(CHAT_ID)).is_dir()
call = containers.run_calls[0]
assert call['args'] == ('sandbox:latest',)
assert call['kwargs']['detach'] is True
assert call['kwargs']['environment'] == {'AGENT_ID': AGENT_ID}
assert call['kwargs']['network'] == 'sandbox'
assert call['kwargs']['labels'] == {
'session_id': str(SESSION_ID),
'chat_id': str(CHAT_ID),
'expires_at': expires_at.isoformat(),
'agent_id': AGENT_ID,
'volume_host_path': str((tmp_path / 'request-volume').resolve(strict=False)),
'endpoint_port': '8000',
}
mounts = call['kwargs']['mounts']
@ -344,9 +397,103 @@ def test_runtime_create_applies_mount_policy_and_labels_with_canonical_chat_id(
'Type': 'bind',
'ReadOnly': True,
},
{
'Target': '/workspace/volume',
'Source': str((tmp_path / 'request-volume').resolve(strict=False)),
'Type': 'bind',
'ReadOnly': False,
},
]
def test_runtime_create_uses_configured_network_for_endpoint(tmp_path: Path) -> None:
config = replace(
build_config(tmp_path),
network_name='agent-net',
agent_service_port=9000,
)
(tmp_path / 'dependencies').mkdir()
(tmp_path / 'lambda-tools').mkdir()
containers = FakeContainers(
run_result=FakeContainer(
'container-456',
network_name='agent-net',
ip='10.42.0.7',
)
)
runtime = build_runtime(config, containers)
created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
expires_at = created_at + timedelta(minutes=5)
session = runtime.create(
session_id=SESSION_ID,
chat_id=CHAT_ID,
agent_id=AGENT_ID,
volume_host_path=str(tmp_path / 'request-volume'),
created_at=created_at,
expires_at=expires_at,
)
assert containers.run_calls[0]['kwargs']['network'] == 'agent-net'
assert session.endpoint == SandboxEndpoint(ip='10.42.0.7', port=9000)
def test_runtime_create_removes_container_when_endpoint_extraction_fails(
tmp_path: Path,
) -> None:
config = build_config(tmp_path)
(tmp_path / 'dependencies').mkdir()
(tmp_path / 'lambda-tools').mkdir()
created_container = FakeContainer(
'container-789',
network_name='unexpected-net',
)
containers = FakeContainers(run_result=created_container)
runtime = build_runtime(config, containers)
with pytest.raises(SandboxStartError) as excinfo:
runtime.create(
session_id=SESSION_ID,
chat_id=CHAT_ID,
agent_id=AGENT_ID,
volume_host_path=str(tmp_path / 'request-volume'),
created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC),
expires_at=datetime(2026, 4, 2, 12, 5, tzinfo=UTC),
)
assert str(excinfo.value) == 'sandbox_start_failed'
assert containers.run_calls
assert created_container.remove_calls == [{'force': True}]
def test_runtime_create_applies_request_volume_bind_as_rw(tmp_path: Path) -> None:
config = build_config(tmp_path)
(tmp_path / 'dependencies').mkdir()
(tmp_path / 'lambda-tools').mkdir()
containers = FakeContainers()
runtime = build_runtime(config, containers)
created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
expires_at = created_at + timedelta(minutes=5)
volume_host_path = str(tmp_path / 'request-volume')
runtime.create(
session_id=SESSION_ID,
chat_id=CHAT_ID,
agent_id=AGENT_ID,
volume_host_path=volume_host_path,
created_at=created_at,
expires_at=expires_at,
)
mounts = [dict(mount) for mount in containers.run_calls[0]['kwargs']['mounts']]
assert {
'Target': '/workspace/volume',
'Source': str((tmp_path / 'request-volume').resolve(strict=False)),
'Type': 'bind',
'ReadOnly': False,
} in mounts
def test_runtime_create_records_observability(tmp_path: Path) -> None:
config = build_config(tmp_path)
(tmp_path / 'dependencies').mkdir()
@ -366,6 +513,8 @@ def test_runtime_create_records_observability(tmp_path: Path) -> None:
session = runtime.create(
session_id=SESSION_ID,
chat_id=CHAT_ID,
agent_id=AGENT_ID,
volume_host_path=str(tmp_path / 'request-volume'),
created_at=created_at,
expires_at=expires_at,
)
@ -402,6 +551,8 @@ def test_runtime_create_raises_start_error_when_container_id_is_missing(
runtime.create(
session_id=SESSION_ID,
chat_id=CHAT_ID,
agent_id=AGENT_ID,
volume_host_path=str(tmp_path / 'request-volume'),
created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC),
expires_at=datetime(2026, 4, 2, 12, 5, tzinfo=UTC),
)
@ -430,6 +581,8 @@ def test_runtime_create_error_records_observability_when_container_id_missing(
runtime.create(
session_id=SESSION_ID,
chat_id=CHAT_ID,
agent_id=AGENT_ID,
volume_host_path=str(tmp_path / 'request-volume'),
created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC),
expires_at=datetime(2026, 4, 2, 12, 5, tzinfo=UTC),
)
@ -438,7 +591,7 @@ def test_runtime_create_error_records_observability_when_container_id_missing(
_find_increment_call(
metrics,
'sandbox.runtime.error.total',
attrs={'operation': 'create', 'error.type': 'SandboxStartError'},
attrs={'operation': 'create', 'error.type': 'ValueError'},
)
duration_call = _find_record_call(
metrics,
@ -598,6 +751,38 @@ def test_runtime_stop_records_observability_on_success(tmp_path: Path) -> None:
assert stop_error_calls == []
def test_runtime_delete_removes_container_with_force(tmp_path: Path) -> None:
config = build_config(tmp_path)
containers = FakeContainers()
container = FakeListedContainer(
'container-123',
labels={
'session_id': str(SESSION_ID),
'chat_id': str(CHAT_ID),
'expires_at': '2026-04-02T12:05:00+00:00',
},
created_at='2026-04-02T12:00:00Z',
)
containers.get_result = container
runtime = build_runtime(config, containers)
runtime.delete('container-123')
assert containers.get_calls == ['container-123']
assert container.remove_calls == [{'force': True}]
def test_runtime_delete_ignores_missing_container(tmp_path: Path) -> None:
config = build_config(tmp_path)
containers = FakeContainers()
containers.get_result = NotFound('missing')
runtime = build_runtime(config, containers)
runtime.delete('container-123')
assert containers.get_calls == ['container-123']
def test_runtime_list_active_sessions_reads_valid_labeled_containers(
tmp_path: Path,
) -> None:
@ -611,6 +796,9 @@ def test_runtime_list_active_sessions_reads_valid_labeled_containers(
'session_id': str(SESSION_ID),
'chat_id': str(CHAT_ID),
'expires_at': expires_at.isoformat(),
'agent_id': AGENT_ID,
'volume_host_path': str(tmp_path / 'request-volume'),
'endpoint_port': '8000',
},
created_at='2026-04-02T12:00:00Z',
),
@ -635,10 +823,24 @@ def test_runtime_list_active_sessions_reads_valid_labeled_containers(
status=SandboxStatus.RUNNING,
created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC),
expires_at=expires_at,
agent_id=AGENT_ID,
volume_host_path=str(tmp_path / 'request-volume'),
endpoint=SandboxEndpoint(ip='172.20.0.8', port=8000),
)
]
assert containers.list_calls == [
{'filters': {'label': ['session_id', 'chat_id', 'expires_at']}}
{
'filters': {
'label': [
'session_id',
'chat_id',
'expires_at',
'agent_id',
'volume_host_path',
'endpoint_port',
]
}
}
]
@ -653,6 +855,9 @@ def test_runtime_list_active_records_observability(tmp_path: Path) -> None:
'session_id': str(SESSION_ID),
'chat_id': str(CHAT_ID),
'expires_at': expires_at.isoformat(),
'agent_id': AGENT_ID,
'volume_host_path': str(tmp_path / 'request-volume'),
'endpoint_port': '8000',
},
created_at='2026-04-02T12:00:00Z',
),

View file

@ -6,14 +6,23 @@ from uuid import UUID
import pytest
from adapter.observability.noop import NoopMetrics, NoopTracer
from domain.error import SandboxConflictError
from domain.sandbox import SandboxSession, SandboxStatus
from repository.sandbox_lock import ProcessLocalSandboxLifecycleLocker
from repository.sandbox_session import InMemorySandboxSessionRepository
from usecase.interface import Attrs, AttrValue
from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox, CreateSandboxCommand
from usecase.sandbox import (
CleanupExpiredSandboxes,
CreateSandbox,
CreateSandboxCommand,
DeleteSandbox,
DeleteSandboxCommand,
)
CHAT_ID = UUID('11111111-1111-1111-1111-111111111111')
NON_CANONICAL_CHAT_ID = '11111111111111111111111111111111'
AGENT_ID = 'agent-alpha'
VOLUME_HOST_PATH = '/srv/sandbox/request-volume'
EXPIRED_CHAT_ID = UUID('22222222-2222-2222-2222-222222222222')
BOUNDARY_CHAT_ID = UUID('33333333-3333-3333-3333-333333333333')
ACTIVE_CHAT_ID = UUID('44444444-4444-4444-4444-444444444444')
@ -30,6 +39,19 @@ SESSION_CLEAN_ID = UUID('00000000-0000-0000-0000-000000000008')
SESSION_REPLACEMENT_ID = UUID('00000000-0000-0000-0000-000000000009')
def _create_command(
chat_id: UUID = CHAT_ID,
*,
agent_id: str = AGENT_ID,
volume_host_path: str = VOLUME_HOST_PATH,
) -> CreateSandboxCommand:
return CreateSandboxCommand(
chat_id=chat_id,
agent_id=agent_id,
volume_host_path=volume_host_path,
)
class FakeClock:
def __init__(self, now: datetime) -> None:
self._now = now
@ -238,6 +260,7 @@ class BlockingCreateRuntime:
def __init__(self) -> None:
self.create_calls: list[dict[str, object]] = []
self.stop_calls: list[str] = []
self.delete_calls: list[str] = []
self.create_started = threading.Event()
self.allow_create = threading.Event()
@ -246,6 +269,8 @@ class BlockingCreateRuntime:
*,
session_id: UUID,
chat_id: UUID,
agent_id: str,
volume_host_path: str,
created_at: datetime,
expires_at: datetime,
) -> SandboxSession:
@ -253,6 +278,8 @@ class BlockingCreateRuntime:
{
'session_id': session_id,
'chat_id': chat_id,
'agent_id': agent_id,
'volume_host_path': volume_host_path,
'created_at': created_at,
'expires_at': expires_at,
}
@ -266,11 +293,16 @@ class BlockingCreateRuntime:
status=SandboxStatus.RUNNING,
created_at=created_at,
expires_at=expires_at,
agent_id=agent_id,
volume_host_path=volume_host_path,
)
def stop(self, container_id: str) -> None:
self.stop_calls.append(container_id)
def delete(self, container_id: str) -> None:
self.delete_calls.append(container_id)
class StaleSnapshotRepository(InMemorySandboxSessionRepository):
def __init__(self, snapshot: SandboxSession) -> None:
@ -301,12 +333,15 @@ class FakeRuntime:
def __init__(self) -> None:
self.create_calls: list[dict[str, object]] = []
self.stop_calls: list[str] = []
self.delete_calls: list[str] = []
def create(
self,
*,
session_id: UUID,
chat_id: UUID,
agent_id: str,
volume_host_path: str,
created_at: datetime,
expires_at: datetime,
) -> SandboxSession:
@ -314,6 +349,8 @@ class FakeRuntime:
{
'session_id': session_id,
'chat_id': chat_id,
'agent_id': agent_id,
'volume_host_path': volume_host_path,
'created_at': created_at,
'expires_at': expires_at,
}
@ -325,11 +362,16 @@ class FakeRuntime:
status=SandboxStatus.RUNNING,
created_at=created_at,
expires_at=expires_at,
agent_id=agent_id,
volume_host_path=volume_host_path,
)
def stop(self, container_id: str) -> None:
self.stop_calls.append(container_id)
def delete(self, container_id: str) -> None:
self.delete_calls.append(container_id)
class FailingStopRuntime(FakeRuntime):
def __init__(self, failing_container_id: str) -> None:
@ -352,6 +394,8 @@ class FailingCreateRuntime(FakeRuntime):
*,
session_id: UUID,
chat_id: UUID,
agent_id: str,
volume_host_path: str,
created_at: datetime,
expires_at: datetime,
) -> SandboxSession:
@ -359,6 +403,8 @@ class FailingCreateRuntime(FakeRuntime):
{
'session_id': session_id,
'chat_id': chat_id,
'agent_id': agent_id,
'volume_host_path': volume_host_path,
'created_at': created_at,
'expires_at': expires_at,
}
@ -375,6 +421,8 @@ def test_create_sandbox_reuses_active_session_when_not_expired() -> None:
status=SandboxStatus.RUNNING,
created_at=now - timedelta(minutes=1),
expires_at=now + timedelta(minutes=4),
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
)
repository = InMemorySandboxSessionRepository()
repository.save(session)
@ -392,7 +440,7 @@ def test_create_sandbox_reuses_active_session_when_not_expired() -> None:
ttl=timedelta(minutes=5),
)
result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
result = usecase.execute(_create_command())
assert result == session
assert runtime.create_calls == []
@ -421,6 +469,8 @@ def test_create_sandbox_reuse_records_observability() -> None:
status=SandboxStatus.RUNNING,
created_at=now - timedelta(minutes=1),
expires_at=now + timedelta(minutes=4),
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
)
repository = InMemorySandboxSessionRepository()
repository.save(session)
@ -437,7 +487,7 @@ def test_create_sandbox_reuse_records_observability() -> None:
ttl=timedelta(minutes=5),
)
result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
result = usecase.execute(_create_command())
assert result == session
_assert_increment_metric_present(
@ -486,7 +536,7 @@ def test_create_sandbox_replace_records_observability_and_final_active_count(
)
monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
result = usecase.execute(_create_command())
assert result.session_id == SESSION_NEW_ID
assert repository.count_active() == 1
@ -543,13 +593,15 @@ def test_create_sandbox_replaces_expired_session_and_creates_new_one(
)
monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
result = usecase.execute(_create_command())
assert runtime.stop_calls == ['container-old']
assert runtime.create_calls == [
{
'session_id': SESSION_NEW_ID,
'chat_id': CHAT_ID,
'agent_id': AGENT_ID,
'volume_host_path': VOLUME_HOST_PATH,
'created_at': now,
'expires_at': now + timedelta(minutes=5),
}
@ -561,6 +613,8 @@ def test_create_sandbox_replaces_expired_session_and_creates_new_one(
status=SandboxStatus.RUNNING,
created_at=now,
expires_at=now + timedelta(minutes=5),
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
)
assert repository.get_active_by_chat_id(CHAT_ID) == result
assert locker.chat_ids == [CHAT_ID]
@ -603,7 +657,7 @@ def test_create_sandbox_creates_new_session_when_none_exists() -> None:
ttl=timedelta(minutes=5),
)
result = usecase.execute(CreateSandboxCommand(chat_id=UUID(NON_CANONICAL_CHAT_ID)))
result = usecase.execute(_create_command(UUID(NON_CANONICAL_CHAT_ID)))
assert result.chat_id == CHAT_ID
assert result.container_id == f'container-{result.session_id}'
@ -614,6 +668,8 @@ def test_create_sandbox_creates_new_session_when_none_exists() -> None:
assert runtime.create_calls[0] == {
'session_id': result.session_id,
'chat_id': CHAT_ID,
'agent_id': AGENT_ID,
'volume_host_path': VOLUME_HOST_PATH,
'created_at': now,
'expires_at': now + timedelta(minutes=5),
}
@ -633,6 +689,105 @@ def test_create_sandbox_creates_new_session_when_none_exists() -> None:
]
def test_create_sandbox_passes_agent_and_volume_params_to_runtime() -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
repository = InMemorySandboxSessionRepository()
runtime = FakeRuntime()
usecase = CreateSandbox(
repository=repository,
locker=FakeLocker(),
runtime=runtime,
clock=FakeClock(now),
logger=FakeLogger(),
metrics=NoopMetrics(),
tracer=NoopTracer(),
ttl=timedelta(minutes=5),
)
result = usecase.execute(_create_command())
assert len(runtime.create_calls) == 1
assert runtime.create_calls[0]['agent_id'] == AGENT_ID
assert runtime.create_calls[0]['volume_host_path'] == VOLUME_HOST_PATH
assert result.agent_id == AGENT_ID
assert result.volume_host_path == VOLUME_HOST_PATH
assert repository.get_active_by_chat_id(CHAT_ID) == result
def test_create_sandbox_reuses_active_session_when_params_match() -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
session = SandboxSession(
session_id=SESSION_REUSED_ID,
chat_id=CHAT_ID,
container_id='container-1',
status=SandboxStatus.RUNNING,
created_at=now - timedelta(minutes=1),
expires_at=now + timedelta(minutes=4),
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
)
repository = InMemorySandboxSessionRepository()
repository.save(session)
runtime = FakeRuntime()
usecase = CreateSandbox(
repository=repository,
locker=FakeLocker(),
runtime=runtime,
clock=FakeClock(now),
logger=FakeLogger(),
metrics=NoopMetrics(),
tracer=NoopTracer(),
ttl=timedelta(minutes=5),
)
result = usecase.execute(_create_command())
assert result == session
assert runtime.create_calls == []
assert runtime.stop_calls == []
assert runtime.delete_calls == []
def test_create_sandbox_reuse_mismatch_raises_conflict_without_runtime_calls() -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
session = SandboxSession(
session_id=SESSION_REUSED_ID,
chat_id=CHAT_ID,
container_id='container-1',
status=SandboxStatus.RUNNING,
created_at=now - timedelta(minutes=1),
expires_at=now + timedelta(minutes=4),
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
)
repository = InMemorySandboxSessionRepository()
repository.save(session)
runtime = FakeRuntime()
usecase = CreateSandbox(
repository=repository,
locker=FakeLocker(),
runtime=runtime,
clock=FakeClock(now),
logger=FakeLogger(),
metrics=NoopMetrics(),
tracer=NoopTracer(),
ttl=timedelta(minutes=5),
)
with pytest.raises(SandboxConflictError):
usecase.execute(
_create_command(
agent_id='agent-beta',
volume_host_path='/srv/sandbox/other-volume',
)
)
assert runtime.create_calls == []
assert runtime.stop_calls == []
assert runtime.delete_calls == []
assert repository.get_active_by_chat_id(CHAT_ID) == session
def test_create_sandbox_error_records_observability(monkeypatch) -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
metrics = RecordingMetrics()
@ -650,7 +805,7 @@ def test_create_sandbox_error_records_observability(monkeypatch) -> None:
monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
with pytest.raises(RuntimeError, match='create_failed') as excinfo:
usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
usecase.execute(_create_command())
_assert_increment_metric_present(
metrics,
@ -688,7 +843,7 @@ def test_create_sandbox_save_failure_stops_untracked_container(monkeypatch) -> N
monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
with pytest.raises(RuntimeError, match='save_failed'):
usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
usecase.execute(_create_command())
assert len(runtime.create_calls) == 1
assert runtime.stop_calls == [f'container-{SESSION_NEW_ID}']
@ -731,7 +886,7 @@ def test_create_sandbox_replace_stop_failure_preserves_separate_identities(
monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
with pytest.raises(RuntimeError, match='stop_failed') as excinfo:
usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
usecase.execute(_create_command())
_assert_increment_metric_present(
metrics,
@ -786,7 +941,7 @@ def test_create_sandbox_replace_save_failure_records_stage_safe_trace_ids(
monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
with pytest.raises(RuntimeError, match='save_failed') as excinfo:
usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
usecase.execute(_create_command())
assert runtime.stop_calls == ['container-old', f'container-{SESSION_NEW_ID}']
assert len(runtime.create_calls) == 1
@ -840,7 +995,7 @@ def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id(
def run_create(index: int) -> None:
try:
results[index] = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
results[index] = usecase.execute(_create_command())
except Exception as exc:
errors.append(exc)
@ -868,6 +1023,8 @@ def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id(
status=SandboxStatus.RUNNING,
created_at=now,
expires_at=now + timedelta(minutes=5),
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
)
assert len(runtime.create_calls) == 1
assert runtime.stop_calls == []
@ -895,6 +1052,67 @@ def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id(
]
def test_delete_sandbox_deletes_session_and_removes_registry_entry() -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
session = SandboxSession(
session_id=SESSION_ACTIVE_ID,
chat_id=CHAT_ID,
container_id='container-active',
status=SandboxStatus.RUNNING,
created_at=now - timedelta(minutes=1),
expires_at=now + timedelta(minutes=4),
agent_id=AGENT_ID,
volume_host_path=VOLUME_HOST_PATH,
)
repository = InMemorySandboxSessionRepository()
repository.save(session)
runtime = FakeRuntime()
locker = FakeLocker()
usecase = DeleteSandbox(
repository=repository,
locker=locker,
runtime=runtime,
logger=FakeLogger(),
metrics=NoopMetrics(),
tracer=NoopTracer(),
)
result = usecase.execute(DeleteSandboxCommand(chat_id=CHAT_ID))
assert result.chat_id == CHAT_ID
assert result.result == 'deleted'
assert result.session_id == SESSION_ACTIVE_ID
assert result.container_id == 'container-active'
assert runtime.delete_calls == ['container-active']
assert runtime.stop_calls == []
assert repository.get_active_by_chat_id(CHAT_ID) is None
assert locker.chat_ids == [CHAT_ID]
def test_delete_sandbox_returns_idempotent_not_found_without_runtime_calls() -> None:
runtime = FakeRuntime()
locker = FakeLocker()
usecase = DeleteSandbox(
repository=InMemorySandboxSessionRepository(),
locker=locker,
runtime=runtime,
logger=FakeLogger(),
metrics=NoopMetrics(),
tracer=NoopTracer(),
)
result = usecase.execute(DeleteSandboxCommand(chat_id=CHAT_ID))
assert result.chat_id == CHAT_ID
assert result.result == 'not_found'
assert result.session_id is None
assert result.container_id is None
assert runtime.create_calls == []
assert runtime.stop_calls == []
assert runtime.delete_calls == []
assert locker.chat_ids == [CHAT_ID]
def test_cleanup_expired_sandboxes_stops_and_deletes_only_expired_sessions() -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
expired_session = SandboxSession(