938 lines
28 KiB
Python
938 lines
28 KiB
Python
from dataclasses import replace
|
|
from datetime import UTC, datetime, timedelta
|
|
from pathlib import Path
|
|
from types import TracebackType
|
|
from typing import Any, TypedDict
|
|
from uuid import UUID
|
|
|
|
import pytest
|
|
from docker import DockerClient
|
|
from docker.errors import DockerException, NotFound
|
|
from docker.types import Mount
|
|
|
|
from adapter.config.model import SandboxConfig
|
|
from adapter.docker.runtime import DockerSandboxRuntime
|
|
from adapter.observability.noop import NoopMetrics, NoopTracer
|
|
from domain.error import SandboxError, SandboxStartError
|
|
from domain.sandbox import SandboxEndpoint, SandboxSession, SandboxStatus
|
|
from usecase.interface import Attrs, AttrValue
|
|
|
|
CHAT_ID = UUID('123e4567-e89b-12d3-a456-426614174000')
|
|
NON_CANONICAL_CHAT_ID = '123E4567E89B12D3A456426614174000'
|
|
SESSION_ID = UUID('00000000-0000-0000-0000-000000000010')
|
|
AGENT_ID = 'agent-alpha'
|
|
|
|
|
|
def _network_attrs(network_name: str = 'sandbox', ip: str = '172.20.0.8') -> dict[str, object]:
|
|
return {
|
|
'NetworkSettings': {
|
|
'Networks': {
|
|
network_name: {
|
|
'IPAddress': ip,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
class FakeContainer:
|
|
def __init__(
|
|
self,
|
|
container_id: str,
|
|
*,
|
|
network_name: str = 'sandbox',
|
|
ip: str = '172.20.0.8',
|
|
) -> None:
|
|
self.id = container_id
|
|
self.stop_calls = 0
|
|
self.remove_calls: list[dict[str, bool]] = []
|
|
self.reload_calls = 0
|
|
self.attrs = _network_attrs(network_name, ip)
|
|
self.labels: dict[str, str] = {}
|
|
|
|
def stop(self) -> None:
|
|
self.stop_calls += 1
|
|
|
|
def reload(self) -> None:
|
|
self.reload_calls += 1
|
|
|
|
def remove(self, *, force: bool) -> None:
|
|
self.remove_calls.append({'force': force})
|
|
|
|
|
|
class FakeListedContainer(FakeContainer):
|
|
def __init__(
|
|
self,
|
|
container_id: str,
|
|
*,
|
|
labels: dict[str, str],
|
|
created_at: str,
|
|
network_name: str = 'sandbox',
|
|
ip: str = '172.20.0.8',
|
|
) -> None:
|
|
super().__init__(container_id, network_name=network_name, ip=ip)
|
|
self.labels = labels
|
|
self.attrs['Created'] = created_at
|
|
|
|
|
|
class FailingStopContainer(FakeListedContainer):
|
|
def __init__(
|
|
self,
|
|
container_id: str,
|
|
*,
|
|
labels: dict[str, str],
|
|
created_at: str,
|
|
error: Exception,
|
|
) -> None:
|
|
super().__init__(
|
|
container_id,
|
|
labels=labels,
|
|
created_at=created_at,
|
|
)
|
|
self._error = error
|
|
|
|
def stop(self) -> None:
|
|
self.stop_calls += 1
|
|
raise self._error
|
|
|
|
|
|
class RunKwargs(TypedDict):
|
|
detach: bool
|
|
environment: dict[str, str]
|
|
labels: dict[str, str]
|
|
mounts: list[Mount]
|
|
network: str
|
|
|
|
|
|
class RunCall(TypedDict):
|
|
args: tuple[str]
|
|
kwargs: RunKwargs
|
|
|
|
|
|
class FakeContainers:
|
|
def __init__(self, run_result: FakeContainer | None = None) -> None:
|
|
self.run_calls: list[RunCall] = []
|
|
self.get_calls: list[str] = []
|
|
self.list_calls: list[dict[str, object]] = []
|
|
self.run_result = run_result or FakeContainer('container-123')
|
|
self.get_result: FakeContainer | Exception | None = None
|
|
self.list_result: list[object] = []
|
|
self.list_error: Exception | None = None
|
|
|
|
def run(
|
|
self,
|
|
image: str,
|
|
*,
|
|
detach: bool,
|
|
environment: dict[str, str],
|
|
labels: dict[str, str],
|
|
mounts: list[Mount],
|
|
network: str,
|
|
) -> FakeContainer:
|
|
self.run_calls.append(
|
|
{
|
|
'args': (image,),
|
|
'kwargs': {
|
|
'detach': detach,
|
|
'environment': environment,
|
|
'labels': labels,
|
|
'mounts': mounts,
|
|
'network': network,
|
|
},
|
|
}
|
|
)
|
|
return self.run_result
|
|
|
|
def get(self, container_id: str) -> FakeContainer:
|
|
self.get_calls.append(container_id)
|
|
if isinstance(self.get_result, Exception):
|
|
raise self.get_result
|
|
if self.get_result is None:
|
|
raise AssertionError('missing get result')
|
|
return self.get_result
|
|
|
|
def list(self, *, filters: dict[str, list[str]]) -> list[object]:
|
|
self.list_calls.append({'filters': filters})
|
|
if self.list_error is not None:
|
|
raise self.list_error
|
|
return self.list_result
|
|
|
|
|
|
class FakeDockerClient(DockerClient):
|
|
def __init__(self, containers: FakeContainers) -> None:
|
|
self._containers = containers
|
|
|
|
@property
|
|
def containers(self) -> Any:
|
|
return self._containers
|
|
|
|
|
|
class RecordingMetrics:
|
|
def __init__(self) -> None:
|
|
self.increment_calls: list[tuple[str, int, Attrs | None]] = []
|
|
self.record_calls: list[tuple[str, float, Attrs | None]] = []
|
|
self.set_calls: list[tuple[str, int | float, Attrs | None]] = []
|
|
|
|
def increment(
|
|
self,
|
|
name: str,
|
|
value: int = 1,
|
|
attrs: Attrs | None = None,
|
|
) -> None:
|
|
self.increment_calls.append((name, value, attrs))
|
|
|
|
def record(
|
|
self,
|
|
name: str,
|
|
value: float,
|
|
attrs: Attrs | None = None,
|
|
) -> None:
|
|
self.record_calls.append((name, value, attrs))
|
|
|
|
def set(
|
|
self,
|
|
name: str,
|
|
value: int | float,
|
|
attrs: Attrs | None = None,
|
|
) -> None:
|
|
self.set_calls.append((name, value, attrs))
|
|
|
|
|
|
class RecordingSpan:
|
|
def __init__(self) -> None:
|
|
self.attrs: dict[str, AttrValue] = {}
|
|
self.errors: list[Exception] = []
|
|
|
|
def set_attribute(self, name: str, value: AttrValue) -> None:
|
|
self.attrs[name] = value
|
|
|
|
def record_error(self, error: Exception) -> None:
|
|
self.errors.append(error)
|
|
|
|
|
|
class RecordingSpanContext:
|
|
def __init__(self, span: RecordingSpan) -> None:
|
|
self._span = span
|
|
|
|
def __enter__(self) -> RecordingSpan:
|
|
return self._span
|
|
|
|
def __exit__(
|
|
self,
|
|
exc_type: type[BaseException] | None,
|
|
exc: BaseException | None,
|
|
traceback: TracebackType | None,
|
|
) -> bool | None:
|
|
return None
|
|
|
|
|
|
class RecordingTracer:
|
|
def __init__(self) -> None:
|
|
self.spans: list[tuple[str, Attrs | None, RecordingSpan]] = []
|
|
|
|
def start_span(
|
|
self,
|
|
name: str,
|
|
attrs: Attrs | None = None,
|
|
) -> RecordingSpanContext:
|
|
span = RecordingSpan()
|
|
self.spans.append((name, attrs, span))
|
|
return RecordingSpanContext(span)
|
|
|
|
|
|
def _attrs_include(
|
|
actual: Attrs | dict[str, AttrValue] | None,
|
|
expected: dict[str, AttrValue],
|
|
) -> bool:
|
|
if actual is None:
|
|
return False
|
|
|
|
return all(actual.get(name) == value for name, value in expected.items())
|
|
|
|
|
|
def _find_span(
|
|
tracer: RecordingTracer,
|
|
name: str,
|
|
attrs: dict[str, AttrValue] | None = None,
|
|
span_attrs: dict[str, AttrValue] | None = None,
|
|
) -> RecordingSpan:
|
|
for recorded_name, recorded_attrs, span in tracer.spans:
|
|
if recorded_name != name:
|
|
continue
|
|
if attrs is not None and not _attrs_include(recorded_attrs, attrs):
|
|
continue
|
|
if span_attrs is not None and not _attrs_include(span.attrs, span_attrs):
|
|
continue
|
|
return span
|
|
|
|
raise AssertionError(f'missing span {name}')
|
|
|
|
|
|
def _find_increment_call(
|
|
metrics: RecordingMetrics,
|
|
name: str,
|
|
*,
|
|
value: int = 1,
|
|
attrs: dict[str, AttrValue] | None = None,
|
|
) -> tuple[str, int, Attrs | None]:
|
|
for recorded_name, recorded_value, recorded_attrs in metrics.increment_calls:
|
|
if recorded_name != name or recorded_value != value:
|
|
continue
|
|
if attrs is not None and not _attrs_include(recorded_attrs, attrs):
|
|
continue
|
|
return recorded_name, recorded_value, recorded_attrs
|
|
|
|
raise AssertionError(f'missing increment metric {name}')
|
|
|
|
|
|
def _find_record_call(
|
|
metrics: RecordingMetrics,
|
|
name: str,
|
|
*,
|
|
attrs: dict[str, AttrValue] | None = None,
|
|
) -> tuple[str, float, Attrs | None]:
|
|
for recorded_name, recorded_value, recorded_attrs in metrics.record_calls:
|
|
if recorded_name != name:
|
|
continue
|
|
if attrs is not None and not _attrs_include(recorded_attrs, attrs):
|
|
continue
|
|
return recorded_name, recorded_value, recorded_attrs
|
|
|
|
raise AssertionError(f'missing record metric {name}')
|
|
|
|
|
|
def build_config(tmp_path: Path) -> SandboxConfig:
|
|
return SandboxConfig(
|
|
image='sandbox:latest',
|
|
network_name='sandbox',
|
|
agent_service_port=8000,
|
|
ttl_seconds=300,
|
|
cleanup_interval_seconds=60,
|
|
chats_root=str(tmp_path / 'chats'),
|
|
dependencies_host_path=str(tmp_path / 'dependencies'),
|
|
lambda_tools_host_path=str(tmp_path / 'lambda-tools'),
|
|
chat_mount_path='/workspace/chat',
|
|
dependencies_mount_path='/workspace/dependencies',
|
|
lambda_tools_mount_path='/workspace/lambda-tools',
|
|
volume_mount_path='/workspace/volume',
|
|
extra_env={},
|
|
)
|
|
|
|
|
|
def build_runtime(
|
|
config: SandboxConfig,
|
|
containers: FakeContainers,
|
|
) -> DockerSandboxRuntime:
|
|
return DockerSandboxRuntime(
|
|
config,
|
|
FakeDockerClient(containers),
|
|
NoopMetrics(),
|
|
NoopTracer(),
|
|
)
|
|
|
|
|
|
def test_runtime_create_applies_mount_policy_and_labels_with_canonical_chat_id(
|
|
tmp_path: Path,
|
|
) -> None:
|
|
config = build_config(tmp_path)
|
|
(tmp_path / 'dependencies').mkdir()
|
|
(tmp_path / 'lambda-tools').mkdir()
|
|
containers = FakeContainers()
|
|
runtime = build_runtime(config, containers)
|
|
created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
|
|
expires_at = created_at + timedelta(minutes=5)
|
|
|
|
session = runtime.create(
|
|
session_id=SESSION_ID,
|
|
chat_id=UUID(NON_CANONICAL_CHAT_ID),
|
|
agent_id=AGENT_ID,
|
|
volume_host_path=str(tmp_path / 'request-volume'),
|
|
created_at=created_at,
|
|
expires_at=expires_at,
|
|
)
|
|
|
|
assert session.session_id == SESSION_ID
|
|
assert session.chat_id == CHAT_ID
|
|
assert session.container_id == 'container-123'
|
|
assert session.status is SandboxStatus.RUNNING
|
|
assert session.created_at == created_at
|
|
assert session.expires_at == expires_at
|
|
assert session.agent_id == AGENT_ID
|
|
assert session.volume_host_path == str(
|
|
(tmp_path / 'request-volume').resolve(strict=False)
|
|
)
|
|
assert session.endpoint == SandboxEndpoint(ip='172.20.0.8', port=8000)
|
|
assert (tmp_path / 'chats' / str(CHAT_ID)).is_dir()
|
|
|
|
call = containers.run_calls[0]
|
|
assert call['args'] == ('sandbox:latest',)
|
|
assert call['kwargs']['detach'] is True
|
|
assert call['kwargs']['environment'] == {'AGENT_ID': AGENT_ID}
|
|
assert call['kwargs']['network'] == 'sandbox'
|
|
assert call['kwargs']['labels'] == {
|
|
'session_id': str(SESSION_ID),
|
|
'chat_id': str(CHAT_ID),
|
|
'expires_at': expires_at.isoformat(),
|
|
'agent_id': AGENT_ID,
|
|
'volume_host_path': str((tmp_path / 'request-volume').resolve(strict=False)),
|
|
'endpoint_port': '8000',
|
|
}
|
|
|
|
mounts = call['kwargs']['mounts']
|
|
assert [dict(mount) for mount in mounts] == [
|
|
{
|
|
'Target': '/workspace/chat',
|
|
'Source': str((tmp_path / 'chats' / str(CHAT_ID)).resolve(strict=False)),
|
|
'Type': 'bind',
|
|
'ReadOnly': False,
|
|
},
|
|
{
|
|
'Target': '/workspace/dependencies',
|
|
'Source': str((tmp_path / 'dependencies').resolve(strict=False)),
|
|
'Type': 'bind',
|
|
'ReadOnly': True,
|
|
},
|
|
{
|
|
'Target': '/workspace/lambda-tools',
|
|
'Source': str((tmp_path / 'lambda-tools').resolve(strict=False)),
|
|
'Type': 'bind',
|
|
'ReadOnly': True,
|
|
},
|
|
{
|
|
'Target': '/workspace/volume',
|
|
'Source': str((tmp_path / 'request-volume').resolve(strict=False)),
|
|
'Type': 'bind',
|
|
'ReadOnly': False,
|
|
},
|
|
]
|
|
|
|
|
|
def test_runtime_create_uses_configured_network_for_endpoint(tmp_path: Path) -> None:
|
|
config = replace(
|
|
build_config(tmp_path),
|
|
network_name='agent-net',
|
|
agent_service_port=9000,
|
|
)
|
|
(tmp_path / 'dependencies').mkdir()
|
|
(tmp_path / 'lambda-tools').mkdir()
|
|
containers = FakeContainers(
|
|
run_result=FakeContainer(
|
|
'container-456',
|
|
network_name='agent-net',
|
|
ip='10.42.0.7',
|
|
)
|
|
)
|
|
runtime = build_runtime(config, containers)
|
|
created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
|
|
expires_at = created_at + timedelta(minutes=5)
|
|
|
|
session = runtime.create(
|
|
session_id=SESSION_ID,
|
|
chat_id=CHAT_ID,
|
|
agent_id=AGENT_ID,
|
|
volume_host_path=str(tmp_path / 'request-volume'),
|
|
created_at=created_at,
|
|
expires_at=expires_at,
|
|
)
|
|
|
|
assert containers.run_calls[0]['kwargs']['network'] == 'agent-net'
|
|
assert session.endpoint == SandboxEndpoint(ip='10.42.0.7', port=9000)
|
|
|
|
|
|
def test_runtime_create_removes_container_when_endpoint_extraction_fails(
|
|
tmp_path: Path,
|
|
) -> None:
|
|
config = build_config(tmp_path)
|
|
(tmp_path / 'dependencies').mkdir()
|
|
(tmp_path / 'lambda-tools').mkdir()
|
|
created_container = FakeContainer(
|
|
'container-789',
|
|
network_name='unexpected-net',
|
|
)
|
|
containers = FakeContainers(run_result=created_container)
|
|
runtime = build_runtime(config, containers)
|
|
|
|
with pytest.raises(SandboxStartError) as excinfo:
|
|
runtime.create(
|
|
session_id=SESSION_ID,
|
|
chat_id=CHAT_ID,
|
|
agent_id=AGENT_ID,
|
|
volume_host_path=str(tmp_path / 'request-volume'),
|
|
created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC),
|
|
expires_at=datetime(2026, 4, 2, 12, 5, tzinfo=UTC),
|
|
)
|
|
|
|
assert str(excinfo.value) == 'sandbox_start_failed'
|
|
assert containers.run_calls
|
|
assert created_container.remove_calls == [{'force': True}]
|
|
|
|
|
|
def test_runtime_create_applies_request_volume_bind_as_rw(tmp_path: Path) -> None:
|
|
config = build_config(tmp_path)
|
|
(tmp_path / 'dependencies').mkdir()
|
|
(tmp_path / 'lambda-tools').mkdir()
|
|
containers = FakeContainers()
|
|
runtime = build_runtime(config, containers)
|
|
created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
|
|
expires_at = created_at + timedelta(minutes=5)
|
|
volume_host_path = str(tmp_path / 'request-volume')
|
|
|
|
runtime.create(
|
|
session_id=SESSION_ID,
|
|
chat_id=CHAT_ID,
|
|
agent_id=AGENT_ID,
|
|
volume_host_path=volume_host_path,
|
|
created_at=created_at,
|
|
expires_at=expires_at,
|
|
)
|
|
|
|
mounts = [dict(mount) for mount in containers.run_calls[0]['kwargs']['mounts']]
|
|
assert {
|
|
'Target': '/workspace/volume',
|
|
'Source': str((tmp_path / 'request-volume').resolve(strict=False)),
|
|
'Type': 'bind',
|
|
'ReadOnly': False,
|
|
} in mounts
|
|
|
|
|
|
def test_runtime_create_records_observability(tmp_path: Path) -> None:
|
|
config = build_config(tmp_path)
|
|
(tmp_path / 'dependencies').mkdir()
|
|
(tmp_path / 'lambda-tools').mkdir()
|
|
containers = FakeContainers()
|
|
metrics = RecordingMetrics()
|
|
tracer = RecordingTracer()
|
|
runtime = DockerSandboxRuntime(
|
|
config,
|
|
FakeDockerClient(containers),
|
|
metrics,
|
|
tracer,
|
|
)
|
|
created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
|
|
expires_at = created_at + timedelta(minutes=5)
|
|
|
|
session = runtime.create(
|
|
session_id=SESSION_ID,
|
|
chat_id=CHAT_ID,
|
|
agent_id=AGENT_ID,
|
|
volume_host_path=str(tmp_path / 'request-volume'),
|
|
created_at=created_at,
|
|
expires_at=expires_at,
|
|
)
|
|
|
|
assert session.container_id == 'container-123'
|
|
duration_call = _find_record_call(
|
|
metrics,
|
|
'sandbox.runtime.create.duration_ms',
|
|
attrs={'operation': 'create', 'result': 'created'},
|
|
)
|
|
assert duration_call[1] >= 0
|
|
span = _find_span(
|
|
tracer,
|
|
'adapter.docker.create_sandbox',
|
|
{'chat.id': str(CHAT_ID), 'session.id': str(SESSION_ID)},
|
|
{
|
|
'container.id': 'container-123',
|
|
'sandbox.result': 'created',
|
|
},
|
|
)
|
|
assert not span.errors
|
|
|
|
|
|
def test_runtime_create_raises_start_error_when_container_id_is_missing(
|
|
tmp_path: Path,
|
|
) -> None:
|
|
config = build_config(tmp_path)
|
|
(tmp_path / 'dependencies').mkdir()
|
|
(tmp_path / 'lambda-tools').mkdir()
|
|
containers = FakeContainers(run_result=FakeContainer(''))
|
|
runtime = build_runtime(config, containers)
|
|
|
|
with pytest.raises(SandboxStartError) as excinfo:
|
|
runtime.create(
|
|
session_id=SESSION_ID,
|
|
chat_id=CHAT_ID,
|
|
agent_id=AGENT_ID,
|
|
volume_host_path=str(tmp_path / 'request-volume'),
|
|
created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC),
|
|
expires_at=datetime(2026, 4, 2, 12, 5, tzinfo=UTC),
|
|
)
|
|
|
|
assert str(excinfo.value) == 'sandbox_start_failed'
|
|
assert excinfo.value.chat_id == str(CHAT_ID)
|
|
|
|
|
|
def test_runtime_create_error_records_observability_when_container_id_missing(
|
|
tmp_path: Path,
|
|
) -> None:
|
|
config = build_config(tmp_path)
|
|
(tmp_path / 'dependencies').mkdir()
|
|
(tmp_path / 'lambda-tools').mkdir()
|
|
containers = FakeContainers(run_result=FakeContainer(''))
|
|
metrics = RecordingMetrics()
|
|
tracer = RecordingTracer()
|
|
runtime = DockerSandboxRuntime(
|
|
config,
|
|
FakeDockerClient(containers),
|
|
metrics,
|
|
tracer,
|
|
)
|
|
|
|
with pytest.raises(SandboxStartError) as excinfo:
|
|
runtime.create(
|
|
session_id=SESSION_ID,
|
|
chat_id=CHAT_ID,
|
|
agent_id=AGENT_ID,
|
|
volume_host_path=str(tmp_path / 'request-volume'),
|
|
created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC),
|
|
expires_at=datetime(2026, 4, 2, 12, 5, tzinfo=UTC),
|
|
)
|
|
|
|
assert str(excinfo.value) == 'sandbox_start_failed'
|
|
_find_increment_call(
|
|
metrics,
|
|
'sandbox.runtime.error.total',
|
|
attrs={'operation': 'create', 'error.type': 'ValueError'},
|
|
)
|
|
duration_call = _find_record_call(
|
|
metrics,
|
|
'sandbox.runtime.create.duration_ms',
|
|
attrs={'operation': 'create', 'result': 'error'},
|
|
)
|
|
assert duration_call[1] >= 0
|
|
span = _find_span(
|
|
tracer,
|
|
'adapter.docker.create_sandbox',
|
|
{'chat.id': str(CHAT_ID), 'session.id': str(SESSION_ID)},
|
|
{'sandbox.result': 'error'},
|
|
)
|
|
assert excinfo.value in span.errors
|
|
|
|
|
|
def test_runtime_stop_ignores_missing_container(tmp_path: Path) -> None:
|
|
config = build_config(tmp_path)
|
|
containers = FakeContainers()
|
|
containers.get_result = NotFound('missing')
|
|
metrics = RecordingMetrics()
|
|
tracer = RecordingTracer()
|
|
runtime = DockerSandboxRuntime(
|
|
config,
|
|
FakeDockerClient(containers),
|
|
metrics,
|
|
tracer,
|
|
)
|
|
|
|
runtime.stop('container-123')
|
|
|
|
assert containers.get_calls == ['container-123']
|
|
duration_call = _find_record_call(
|
|
metrics,
|
|
'sandbox.runtime.stop.duration_ms',
|
|
attrs={'operation': 'stop', 'result': 'not_found'},
|
|
)
|
|
assert duration_call[1] >= 0
|
|
span = _find_span(
|
|
tracer,
|
|
'adapter.docker.stop_sandbox',
|
|
{'container.id': 'container-123'},
|
|
{'sandbox.result': 'not_found'},
|
|
)
|
|
assert not span.errors
|
|
stop_error_calls = [
|
|
call
|
|
for call in metrics.increment_calls
|
|
if call[0] == 'sandbox.runtime.error.total'
|
|
and call[2] is not None
|
|
and call[2].get('operation') == 'stop'
|
|
]
|
|
assert stop_error_calls == []
|
|
|
|
|
|
def test_runtime_stop_wraps_docker_errors(tmp_path: Path) -> None:
|
|
config = build_config(tmp_path)
|
|
containers = FakeContainers()
|
|
containers.get_result = FailingStopContainer(
|
|
'container-123',
|
|
labels={
|
|
'session_id': str(SESSION_ID),
|
|
'chat_id': str(CHAT_ID),
|
|
'expires_at': '2026-04-02T12:05:00+00:00',
|
|
},
|
|
created_at='2026-04-02T12:00:00Z',
|
|
error=DockerException('boom'),
|
|
)
|
|
metrics = RecordingMetrics()
|
|
tracer = RecordingTracer()
|
|
runtime = DockerSandboxRuntime(
|
|
config,
|
|
FakeDockerClient(containers),
|
|
metrics,
|
|
tracer,
|
|
)
|
|
|
|
with pytest.raises(SandboxError) as excinfo:
|
|
runtime.stop('container-123')
|
|
|
|
assert str(excinfo.value) == 'sandbox_stop_failed'
|
|
_find_increment_call(
|
|
metrics,
|
|
'sandbox.runtime.error.total',
|
|
attrs={'operation': 'stop', 'error.type': 'DockerException'},
|
|
)
|
|
duration_call = _find_record_call(
|
|
metrics,
|
|
'sandbox.runtime.stop.duration_ms',
|
|
attrs={'operation': 'stop', 'result': 'error'},
|
|
)
|
|
assert duration_call[1] >= 0
|
|
span = _find_span(
|
|
tracer,
|
|
'adapter.docker.stop_sandbox',
|
|
{'container.id': 'container-123'},
|
|
{
|
|
'session.id': str(SESSION_ID),
|
|
'chat.id': str(CHAT_ID),
|
|
'sandbox.result': 'error',
|
|
},
|
|
)
|
|
cause = excinfo.value.__cause__
|
|
assert isinstance(cause, DockerException)
|
|
assert cause in span.errors
|
|
|
|
|
|
def test_runtime_stop_records_observability_on_success(tmp_path: Path) -> None:
|
|
config = build_config(tmp_path)
|
|
containers = FakeContainers()
|
|
container = FakeListedContainer(
|
|
'container-123',
|
|
labels={
|
|
'session_id': str(SESSION_ID),
|
|
'chat_id': str(CHAT_ID),
|
|
'expires_at': '2026-04-02T12:05:00+00:00',
|
|
},
|
|
created_at='2026-04-02T12:00:00Z',
|
|
)
|
|
containers.get_result = container
|
|
metrics = RecordingMetrics()
|
|
tracer = RecordingTracer()
|
|
runtime = DockerSandboxRuntime(
|
|
config,
|
|
FakeDockerClient(containers),
|
|
metrics,
|
|
tracer,
|
|
)
|
|
|
|
runtime.stop('container-123')
|
|
|
|
assert container.stop_calls == 1
|
|
duration_call = _find_record_call(
|
|
metrics,
|
|
'sandbox.runtime.stop.duration_ms',
|
|
attrs={'operation': 'stop', 'result': 'stopped'},
|
|
)
|
|
assert duration_call[1] >= 0
|
|
span = _find_span(
|
|
tracer,
|
|
'adapter.docker.stop_sandbox',
|
|
{'container.id': 'container-123'},
|
|
{
|
|
'session.id': str(SESSION_ID),
|
|
'chat.id': str(CHAT_ID),
|
|
'sandbox.result': 'stopped',
|
|
},
|
|
)
|
|
assert not span.errors
|
|
stop_error_calls = [
|
|
call
|
|
for call in metrics.increment_calls
|
|
if call[0] == 'sandbox.runtime.error.total'
|
|
and call[2] is not None
|
|
and call[2].get('operation') == 'stop'
|
|
]
|
|
assert stop_error_calls == []
|
|
|
|
|
|
def test_runtime_delete_removes_container_with_force(tmp_path: Path) -> None:
|
|
config = build_config(tmp_path)
|
|
containers = FakeContainers()
|
|
container = FakeListedContainer(
|
|
'container-123',
|
|
labels={
|
|
'session_id': str(SESSION_ID),
|
|
'chat_id': str(CHAT_ID),
|
|
'expires_at': '2026-04-02T12:05:00+00:00',
|
|
},
|
|
created_at='2026-04-02T12:00:00Z',
|
|
)
|
|
containers.get_result = container
|
|
runtime = build_runtime(config, containers)
|
|
|
|
runtime.delete('container-123')
|
|
|
|
assert containers.get_calls == ['container-123']
|
|
assert container.remove_calls == [{'force': True}]
|
|
|
|
|
|
def test_runtime_delete_ignores_missing_container(tmp_path: Path) -> None:
|
|
config = build_config(tmp_path)
|
|
containers = FakeContainers()
|
|
containers.get_result = NotFound('missing')
|
|
runtime = build_runtime(config, containers)
|
|
|
|
runtime.delete('container-123')
|
|
|
|
assert containers.get_calls == ['container-123']
|
|
|
|
|
|
def test_runtime_list_active_sessions_reads_valid_labeled_containers(
|
|
tmp_path: Path,
|
|
) -> None:
|
|
config = build_config(tmp_path)
|
|
containers = FakeContainers()
|
|
expires_at = datetime(2026, 4, 2, 12, 5, tzinfo=UTC)
|
|
containers.list_result = [
|
|
FakeListedContainer(
|
|
'container-123',
|
|
labels={
|
|
'session_id': str(SESSION_ID),
|
|
'chat_id': str(CHAT_ID),
|
|
'expires_at': expires_at.isoformat(),
|
|
'agent_id': AGENT_ID,
|
|
'volume_host_path': str(tmp_path / 'request-volume'),
|
|
'endpoint_port': '8000',
|
|
},
|
|
created_at='2026-04-02T12:00:00Z',
|
|
),
|
|
FakeListedContainer(
|
|
'container-bad',
|
|
labels={
|
|
'chat_id': str(CHAT_ID),
|
|
'expires_at': expires_at.isoformat(),
|
|
},
|
|
created_at='2026-04-02T12:01:00Z',
|
|
),
|
|
]
|
|
runtime = build_runtime(config, containers)
|
|
|
|
sessions = runtime.list_active_sessions()
|
|
|
|
assert sessions == [
|
|
SandboxSession(
|
|
session_id=SESSION_ID,
|
|
chat_id=CHAT_ID,
|
|
container_id='container-123',
|
|
status=SandboxStatus.RUNNING,
|
|
created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC),
|
|
expires_at=expires_at,
|
|
agent_id=AGENT_ID,
|
|
volume_host_path=str(tmp_path / 'request-volume'),
|
|
endpoint=SandboxEndpoint(ip='172.20.0.8', port=8000),
|
|
)
|
|
]
|
|
assert containers.list_calls == [
|
|
{
|
|
'filters': {
|
|
'label': [
|
|
'session_id',
|
|
'chat_id',
|
|
'expires_at',
|
|
'agent_id',
|
|
'volume_host_path',
|
|
'endpoint_port',
|
|
]
|
|
}
|
|
}
|
|
]
|
|
|
|
|
|
def test_runtime_list_active_records_observability(tmp_path: Path) -> None:
|
|
config = build_config(tmp_path)
|
|
containers = FakeContainers()
|
|
expires_at = datetime(2026, 4, 2, 12, 5, tzinfo=UTC)
|
|
containers.list_result = [
|
|
FakeListedContainer(
|
|
'container-123',
|
|
labels={
|
|
'session_id': str(SESSION_ID),
|
|
'chat_id': str(CHAT_ID),
|
|
'expires_at': expires_at.isoformat(),
|
|
'agent_id': AGENT_ID,
|
|
'volume_host_path': str(tmp_path / 'request-volume'),
|
|
'endpoint_port': '8000',
|
|
},
|
|
created_at='2026-04-02T12:00:00Z',
|
|
),
|
|
FakeListedContainer(
|
|
'container-bad',
|
|
labels={
|
|
'chat_id': str(CHAT_ID),
|
|
'expires_at': expires_at.isoformat(),
|
|
},
|
|
created_at='2026-04-02T12:01:00Z',
|
|
),
|
|
]
|
|
metrics = RecordingMetrics()
|
|
tracer = RecordingTracer()
|
|
runtime = DockerSandboxRuntime(
|
|
config,
|
|
FakeDockerClient(containers),
|
|
metrics,
|
|
tracer,
|
|
)
|
|
|
|
sessions = runtime.list_active_sessions()
|
|
|
|
assert len(sessions) == 1
|
|
duration_call = _find_record_call(
|
|
metrics,
|
|
'sandbox.runtime.list_active.duration_ms',
|
|
attrs={'operation': 'list_active', 'result': 'listed'},
|
|
)
|
|
assert duration_call[1] >= 0
|
|
span = _find_span(
|
|
tracer,
|
|
'adapter.docker.list_active_sandboxes',
|
|
span_attrs={
|
|
'sandbox.container_count': 2,
|
|
'sandbox.active_count': 1,
|
|
'sandbox.result': 'listed',
|
|
},
|
|
)
|
|
assert not span.errors
|
|
|
|
|
|
def test_runtime_list_active_error_records_observability(tmp_path: Path) -> None:
|
|
config = build_config(tmp_path)
|
|
containers = FakeContainers()
|
|
containers.list_error = DockerException('boom')
|
|
metrics = RecordingMetrics()
|
|
tracer = RecordingTracer()
|
|
runtime = DockerSandboxRuntime(
|
|
config,
|
|
FakeDockerClient(containers),
|
|
metrics,
|
|
tracer,
|
|
)
|
|
|
|
with pytest.raises(SandboxError) as excinfo:
|
|
runtime.list_active_sessions()
|
|
|
|
assert str(excinfo.value) == 'sandbox_list_failed'
|
|
_find_increment_call(
|
|
metrics,
|
|
'sandbox.runtime.error.total',
|
|
attrs={'operation': 'list_active', 'error.type': 'DockerException'},
|
|
)
|
|
duration_call = _find_record_call(
|
|
metrics,
|
|
'sandbox.runtime.list_active.duration_ms',
|
|
attrs={'operation': 'list_active', 'result': 'error'},
|
|
)
|
|
assert duration_call[1] >= 0
|
|
span = _find_span(
|
|
tracer,
|
|
'adapter.docker.list_active_sandboxes',
|
|
span_attrs={'sandbox.result': 'error'},
|
|
)
|
|
assert isinstance(excinfo.value.__cause__, DockerException)
|
|
assert excinfo.value in span.errors
|