diff --git a/tasks.md b/tasks.md index 77dd415..861a726 100644 --- a/tasks.md +++ b/tasks.md @@ -341,7 +341,7 @@ ### M28. Регрессии на rollback и startup failure observability - Субагент: `test-engineer` -- Статус: pending +- Статус: completed - Зависимости: `M27` - Commit required: yes - Commit message: `add sandbox rollback regression tests` diff --git a/test/test_create_http.py b/test/test_create_http.py index e8686c4..ae302c2 100644 --- a/test/test_create_http.py +++ b/test/test_create_http.py @@ -3,6 +3,7 @@ import json from datetime import UTC, datetime, timedelta from uuid import UUID +import pytest from docker import DockerClient from fastapi import FastAPI from starlette.types import Message, Scope @@ -226,6 +227,16 @@ class FixedSandboxState: return list(self._sessions) +class FailingSandboxState: + def __init__(self, error: Exception) -> None: + self._error = error + self.calls = 0 + + def list_active_sessions(self) -> list[SandboxSession]: + self.calls += 1 + raise self._error + + class CountingRegistry: def __init__(self, count_active_result: int) -> None: self._count_active_result = count_active_result @@ -238,6 +249,25 @@ class CountingRegistry: return self._count_active_result +class FailingRegistry: + def __init__(self, error: Exception, *, fail_on: str = 'replace_all') -> None: + self._error = error + self._fail_on = fail_on + self.replaced_sessions: list[SandboxSession] = [] + self.count_calls = 0 + + def replace_all(self, sessions: list[SandboxSession]) -> None: + self.replaced_sessions = list(sessions) + if self._fail_on == 'replace_all': + raise self._error + + def count_active(self) -> int: + self.count_calls += 1 + if self._fail_on == 'count_active': + raise self._error + return 0 + + def build_config() -> AppConfig: return AppConfig( app=AppSectionConfig(name='master', env='test'), @@ -719,6 +749,124 @@ def test_reconciliation_uses_registry_backed_active_count_metric() -> None: assert tracer.spans[0][2].attrs['sandbox.active_count'] == 7 +def test_reconciliation_records_error_when_state_source_fails() -> None: + logger = FakeLogger() + metrics = RecordingMetrics() + tracer = RecordingTracer() + state_error = RuntimeError('state_failed') + state_source = FailingSandboxState(state_error) + reconciler = SandboxSessionReconciler( + state_source=state_source, + registry=CountingRegistry(count_active_result=7), + logger=logger, + metrics=metrics, + tracer=tracer, + ) + + with pytest.raises(RuntimeError, match='state_failed') as excinfo: + reconciler.execute() + + assert state_source.calls == 1 + assert metrics.set_calls == [] + spans = [ + span + for name, _, span in tracer.spans + if name == 'adapter.sandbox.reconcile_sessions' + ] + assert spans + span = spans[0] + assert span.attrs['sandbox.result'] == 'error' + assert 'sandbox.discovered_count' not in span.attrs + assert 'sandbox.active_count' not in span.attrs + assert excinfo.value in span.errors + + +def test_reconciliation_records_error_without_active_count_metric_on_registry_failure() -> ( + None +): + logger = FakeLogger() + metrics = RecordingMetrics() + tracer = RecordingTracer() + created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + session = SandboxSession( + session_id=SESSION_ID, + chat_id=CHAT_ID, + container_id='container-123', + status=SandboxStatus.RUNNING, + created_at=created_at, + expires_at=created_at + timedelta(minutes=5), + ) + registry_error = RuntimeError('replace_failed') + registry = FailingRegistry(registry_error) + reconciler = SandboxSessionReconciler( + state_source=FixedSandboxState([session]), + registry=registry, + logger=logger, + metrics=metrics, + tracer=tracer, + ) + + with pytest.raises(RuntimeError, match='replace_failed') as excinfo: + reconciler.execute() + + assert registry.replaced_sessions == [session] + assert registry.count_calls == 0 + assert metrics.set_calls == [] + spans = [ + span + for name, _, span in tracer.spans + if name == 'adapter.sandbox.reconcile_sessions' + ] + assert spans + span = spans[0] + assert span.attrs['sandbox.discovered_count'] == 1 + assert span.attrs['sandbox.result'] == 'error' + assert 'sandbox.active_count' not in span.attrs + assert excinfo.value in span.errors + + +def test_reconciliation_records_error_when_registry_count_active_fails() -> None: + logger = FakeLogger() + metrics = RecordingMetrics() + tracer = RecordingTracer() + created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + session = SandboxSession( + session_id=SESSION_ID, + chat_id=CHAT_ID, + container_id='container-123', + status=SandboxStatus.RUNNING, + created_at=created_at, + expires_at=created_at + timedelta(minutes=5), + ) + registry_error = RuntimeError('count_failed') + registry = FailingRegistry(registry_error, fail_on='count_active') + reconciler = SandboxSessionReconciler( + state_source=FixedSandboxState([session]), + registry=registry, + logger=logger, + metrics=metrics, + tracer=tracer, + ) + + with pytest.raises(RuntimeError, match='count_failed') as excinfo: + reconciler.execute() + + assert registry.replaced_sessions == [session] + assert registry.count_calls == 1 + assert metrics.set_calls == [] + spans = [ + span + for name, _, span in tracer.spans + if name == 'adapter.sandbox.reconcile_sessions' + ] + assert spans + span = spans[0] + assert span.attrs['sandbox.discovered_count'] == 1 + assert 'sandbox.active_count' not in span.attrs + assert span.attrs['sandbox.result'] == 'error' + assert excinfo.value in span.errors + + def test_build_container_wires_observability_into_runtime_and_reconciler( monkeypatch, ) -> None: diff --git a/test/test_docker_runtime.py b/test/test_docker_runtime.py index 352adad..267d177 100644 --- a/test/test_docker_runtime.py +++ b/test/test_docker_runtime.py @@ -83,6 +83,7 @@ class FakeContainers: self.run_result = run_result or FakeContainer('container-123') self.get_result: FakeContainer | Exception | None = None self.list_result: list[object] = [] + self.list_error: Exception | None = None def run( self, @@ -114,6 +115,8 @@ class FakeContainers: def list(self, *, filters: dict[str, list[str]]) -> list[object]: self.list_calls.append({'filters': filters}) + if self.list_error is not None: + raise self.list_error return self.list_result @@ -690,3 +693,40 @@ def test_runtime_list_active_records_observability(tmp_path: Path) -> None: }, ) assert not span.errors + + +def test_runtime_list_active_error_records_observability(tmp_path: Path) -> None: + config = build_config(tmp_path) + containers = FakeContainers() + containers.list_error = DockerException('boom') + metrics = RecordingMetrics() + tracer = RecordingTracer() + runtime = DockerSandboxRuntime( + config, + FakeDockerClient(containers), + metrics, + tracer, + ) + + with pytest.raises(SandboxError) as excinfo: + runtime.list_active_sessions() + + assert str(excinfo.value) == 'sandbox_list_failed' + _find_increment_call( + metrics, + 'sandbox.runtime.error.total', + attrs={'operation': 'list_active', 'error.type': 'DockerException'}, + ) + duration_call = _find_record_call( + metrics, + 'sandbox.runtime.list_active.duration_ms', + attrs={'operation': 'list_active', 'result': 'error'}, + ) + assert duration_call[1] >= 0 + span = _find_span( + tracer, + 'adapter.docker.list_active_sandboxes', + span_attrs={'sandbox.result': 'error'}, + ) + assert isinstance(excinfo.value.__cause__, DockerException) + assert excinfo.value in span.errors