add sandbox observability failure tests

This commit is contained in:
Azamat 2026-04-03 02:04:51 +03:00
parent 02770bce7d
commit b4a2a9ceea
3 changed files with 268 additions and 6 deletions

View file

@ -43,6 +43,27 @@ class FakeListedContainer(FakeContainer):
self.attrs = {'Created': created_at}
class FailingStopContainer(FakeListedContainer):
def __init__(
self,
container_id: str,
*,
labels: dict[str, str],
created_at: str,
error: Exception,
) -> None:
super().__init__(
container_id,
labels=labels,
created_at=created_at,
)
self._error = error
def stop(self) -> None:
self.stop_calls += 1
raise self._error
class RunKwargs(TypedDict):
detach: bool
labels: dict[str, str]
@ -435,23 +456,143 @@ def test_runtime_stop_ignores_missing_container(tmp_path: Path) -> None:
config = build_config(tmp_path)
containers = FakeContainers()
containers.get_result = NotFound('missing')
runtime = build_runtime(config, containers)
metrics = RecordingMetrics()
tracer = RecordingTracer()
runtime = DockerSandboxRuntime(
config,
FakeDockerClient(containers),
metrics,
tracer,
)
runtime.stop('container-123')
assert containers.get_calls == ['container-123']
duration_call = _find_record_call(
metrics,
'sandbox.runtime.stop.duration_ms',
attrs={'operation': 'stop', 'result': 'not_found'},
)
assert duration_call[1] >= 0
span = _find_span(
tracer,
'adapter.docker.stop_sandbox',
{'container.id': 'container-123'},
{'sandbox.result': 'not_found'},
)
assert not span.errors
stop_error_calls = [
call
for call in metrics.increment_calls
if call[0] == 'sandbox.runtime.error.total'
and call[2] is not None
and call[2].get('operation') == 'stop'
]
assert stop_error_calls == []
def test_runtime_stop_wraps_docker_errors(tmp_path: Path) -> None:
config = build_config(tmp_path)
containers = FakeContainers()
containers.get_result = DockerException('boom')
runtime = build_runtime(config, containers)
containers.get_result = FailingStopContainer(
'container-123',
labels={
'session_id': str(SESSION_ID),
'chat_id': str(CHAT_ID),
'expires_at': '2026-04-02T12:05:00+00:00',
},
created_at='2026-04-02T12:00:00Z',
error=DockerException('boom'),
)
metrics = RecordingMetrics()
tracer = RecordingTracer()
runtime = DockerSandboxRuntime(
config,
FakeDockerClient(containers),
metrics,
tracer,
)
with pytest.raises(SandboxError) as excinfo:
runtime.stop('container-123')
assert str(excinfo.value) == 'sandbox_stop_failed'
_find_increment_call(
metrics,
'sandbox.runtime.error.total',
attrs={'operation': 'stop', 'error.type': 'DockerException'},
)
duration_call = _find_record_call(
metrics,
'sandbox.runtime.stop.duration_ms',
attrs={'operation': 'stop', 'result': 'error'},
)
assert duration_call[1] >= 0
span = _find_span(
tracer,
'adapter.docker.stop_sandbox',
{'container.id': 'container-123'},
{
'session.id': str(SESSION_ID),
'chat.id': str(CHAT_ID),
'sandbox.result': 'error',
},
)
cause = excinfo.value.__cause__
assert isinstance(cause, DockerException)
assert cause in span.errors
def test_runtime_stop_records_observability_on_success(tmp_path: Path) -> None:
config = build_config(tmp_path)
containers = FakeContainers()
container = FakeListedContainer(
'container-123',
labels={
'session_id': str(SESSION_ID),
'chat_id': str(CHAT_ID),
'expires_at': '2026-04-02T12:05:00+00:00',
},
created_at='2026-04-02T12:00:00Z',
)
containers.get_result = container
metrics = RecordingMetrics()
tracer = RecordingTracer()
runtime = DockerSandboxRuntime(
config,
FakeDockerClient(containers),
metrics,
tracer,
)
runtime.stop('container-123')
assert container.stop_calls == 1
duration_call = _find_record_call(
metrics,
'sandbox.runtime.stop.duration_ms',
attrs={'operation': 'stop', 'result': 'stopped'},
)
assert duration_call[1] >= 0
span = _find_span(
tracer,
'adapter.docker.stop_sandbox',
{'container.id': 'container-123'},
{
'session.id': str(SESSION_ID),
'chat.id': str(CHAT_ID),
'sandbox.result': 'stopped',
},
)
assert not span.errors
stop_error_calls = [
call
for call in metrics.increment_calls
if call[0] == 'sandbox.runtime.error.total'
and call[2] is not None
and call[2].get('operation') == 'stop'
]
assert stop_error_calls == []
def test_runtime_list_active_sessions_reads_valid_labeled_containers(