add sandbox observability failure tests
This commit is contained in:
parent
02770bce7d
commit
b4a2a9ceea
3 changed files with 268 additions and 6 deletions
|
|
@ -43,6 +43,27 @@ class FakeListedContainer(FakeContainer):
|
|||
self.attrs = {'Created': created_at}
|
||||
|
||||
|
||||
class FailingStopContainer(FakeListedContainer):
|
||||
def __init__(
|
||||
self,
|
||||
container_id: str,
|
||||
*,
|
||||
labels: dict[str, str],
|
||||
created_at: str,
|
||||
error: Exception,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
container_id,
|
||||
labels=labels,
|
||||
created_at=created_at,
|
||||
)
|
||||
self._error = error
|
||||
|
||||
def stop(self) -> None:
|
||||
self.stop_calls += 1
|
||||
raise self._error
|
||||
|
||||
|
||||
class RunKwargs(TypedDict):
|
||||
detach: bool
|
||||
labels: dict[str, str]
|
||||
|
|
@ -435,23 +456,143 @@ def test_runtime_stop_ignores_missing_container(tmp_path: Path) -> None:
|
|||
config = build_config(tmp_path)
|
||||
containers = FakeContainers()
|
||||
containers.get_result = NotFound('missing')
|
||||
runtime = build_runtime(config, containers)
|
||||
metrics = RecordingMetrics()
|
||||
tracer = RecordingTracer()
|
||||
runtime = DockerSandboxRuntime(
|
||||
config,
|
||||
FakeDockerClient(containers),
|
||||
metrics,
|
||||
tracer,
|
||||
)
|
||||
|
||||
runtime.stop('container-123')
|
||||
|
||||
assert containers.get_calls == ['container-123']
|
||||
duration_call = _find_record_call(
|
||||
metrics,
|
||||
'sandbox.runtime.stop.duration_ms',
|
||||
attrs={'operation': 'stop', 'result': 'not_found'},
|
||||
)
|
||||
assert duration_call[1] >= 0
|
||||
span = _find_span(
|
||||
tracer,
|
||||
'adapter.docker.stop_sandbox',
|
||||
{'container.id': 'container-123'},
|
||||
{'sandbox.result': 'not_found'},
|
||||
)
|
||||
assert not span.errors
|
||||
stop_error_calls = [
|
||||
call
|
||||
for call in metrics.increment_calls
|
||||
if call[0] == 'sandbox.runtime.error.total'
|
||||
and call[2] is not None
|
||||
and call[2].get('operation') == 'stop'
|
||||
]
|
||||
assert stop_error_calls == []
|
||||
|
||||
|
||||
def test_runtime_stop_wraps_docker_errors(tmp_path: Path) -> None:
|
||||
config = build_config(tmp_path)
|
||||
containers = FakeContainers()
|
||||
containers.get_result = DockerException('boom')
|
||||
runtime = build_runtime(config, containers)
|
||||
containers.get_result = FailingStopContainer(
|
||||
'container-123',
|
||||
labels={
|
||||
'session_id': str(SESSION_ID),
|
||||
'chat_id': str(CHAT_ID),
|
||||
'expires_at': '2026-04-02T12:05:00+00:00',
|
||||
},
|
||||
created_at='2026-04-02T12:00:00Z',
|
||||
error=DockerException('boom'),
|
||||
)
|
||||
metrics = RecordingMetrics()
|
||||
tracer = RecordingTracer()
|
||||
runtime = DockerSandboxRuntime(
|
||||
config,
|
||||
FakeDockerClient(containers),
|
||||
metrics,
|
||||
tracer,
|
||||
)
|
||||
|
||||
with pytest.raises(SandboxError) as excinfo:
|
||||
runtime.stop('container-123')
|
||||
|
||||
assert str(excinfo.value) == 'sandbox_stop_failed'
|
||||
_find_increment_call(
|
||||
metrics,
|
||||
'sandbox.runtime.error.total',
|
||||
attrs={'operation': 'stop', 'error.type': 'DockerException'},
|
||||
)
|
||||
duration_call = _find_record_call(
|
||||
metrics,
|
||||
'sandbox.runtime.stop.duration_ms',
|
||||
attrs={'operation': 'stop', 'result': 'error'},
|
||||
)
|
||||
assert duration_call[1] >= 0
|
||||
span = _find_span(
|
||||
tracer,
|
||||
'adapter.docker.stop_sandbox',
|
||||
{'container.id': 'container-123'},
|
||||
{
|
||||
'session.id': str(SESSION_ID),
|
||||
'chat.id': str(CHAT_ID),
|
||||
'sandbox.result': 'error',
|
||||
},
|
||||
)
|
||||
cause = excinfo.value.__cause__
|
||||
assert isinstance(cause, DockerException)
|
||||
assert cause in span.errors
|
||||
|
||||
|
||||
def test_runtime_stop_records_observability_on_success(tmp_path: Path) -> None:
|
||||
config = build_config(tmp_path)
|
||||
containers = FakeContainers()
|
||||
container = FakeListedContainer(
|
||||
'container-123',
|
||||
labels={
|
||||
'session_id': str(SESSION_ID),
|
||||
'chat_id': str(CHAT_ID),
|
||||
'expires_at': '2026-04-02T12:05:00+00:00',
|
||||
},
|
||||
created_at='2026-04-02T12:00:00Z',
|
||||
)
|
||||
containers.get_result = container
|
||||
metrics = RecordingMetrics()
|
||||
tracer = RecordingTracer()
|
||||
runtime = DockerSandboxRuntime(
|
||||
config,
|
||||
FakeDockerClient(containers),
|
||||
metrics,
|
||||
tracer,
|
||||
)
|
||||
|
||||
runtime.stop('container-123')
|
||||
|
||||
assert container.stop_calls == 1
|
||||
duration_call = _find_record_call(
|
||||
metrics,
|
||||
'sandbox.runtime.stop.duration_ms',
|
||||
attrs={'operation': 'stop', 'result': 'stopped'},
|
||||
)
|
||||
assert duration_call[1] >= 0
|
||||
span = _find_span(
|
||||
tracer,
|
||||
'adapter.docker.stop_sandbox',
|
||||
{'container.id': 'container-123'},
|
||||
{
|
||||
'session.id': str(SESSION_ID),
|
||||
'chat.id': str(CHAT_ID),
|
||||
'sandbox.result': 'stopped',
|
||||
},
|
||||
)
|
||||
assert not span.errors
|
||||
stop_error_calls = [
|
||||
call
|
||||
for call in metrics.increment_calls
|
||||
if call[0] == 'sandbox.runtime.error.total'
|
||||
and call[2] is not None
|
||||
and call[2].get('operation') == 'stop'
|
||||
]
|
||||
assert stop_error_calls == []
|
||||
|
||||
|
||||
def test_runtime_list_active_sessions_reads_valid_labeled_containers(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue