add sandbox rollback regression tests

This commit is contained in:
Azamat 2026-04-03 02:29:18 +03:00
parent 9b6c7908ad
commit c5b6a84a4b
3 changed files with 189 additions and 1 deletions

View file

@ -341,7 +341,7 @@
### M28. Регрессии на rollback и startup failure observability ### M28. Регрессии на rollback и startup failure observability
- Субагент: `test-engineer` - Субагент: `test-engineer`
- Статус: pending - Статус: completed
- Зависимости: `M27` - Зависимости: `M27`
- Commit required: yes - Commit required: yes
- Commit message: `add sandbox rollback regression tests` - Commit message: `add sandbox rollback regression tests`

View file

@ -3,6 +3,7 @@ import json
from datetime import UTC, datetime, timedelta from datetime import UTC, datetime, timedelta
from uuid import UUID from uuid import UUID
import pytest
from docker import DockerClient from docker import DockerClient
from fastapi import FastAPI from fastapi import FastAPI
from starlette.types import Message, Scope from starlette.types import Message, Scope
@ -226,6 +227,16 @@ class FixedSandboxState:
return list(self._sessions) return list(self._sessions)
class FailingSandboxState:
def __init__(self, error: Exception) -> None:
self._error = error
self.calls = 0
def list_active_sessions(self) -> list[SandboxSession]:
self.calls += 1
raise self._error
class CountingRegistry: class CountingRegistry:
def __init__(self, count_active_result: int) -> None: def __init__(self, count_active_result: int) -> None:
self._count_active_result = count_active_result self._count_active_result = count_active_result
@ -238,6 +249,25 @@ class CountingRegistry:
return self._count_active_result return self._count_active_result
class FailingRegistry:
def __init__(self, error: Exception, *, fail_on: str = 'replace_all') -> None:
self._error = error
self._fail_on = fail_on
self.replaced_sessions: list[SandboxSession] = []
self.count_calls = 0
def replace_all(self, sessions: list[SandboxSession]) -> None:
self.replaced_sessions = list(sessions)
if self._fail_on == 'replace_all':
raise self._error
def count_active(self) -> int:
self.count_calls += 1
if self._fail_on == 'count_active':
raise self._error
return 0
def build_config() -> AppConfig: def build_config() -> AppConfig:
return AppConfig( return AppConfig(
app=AppSectionConfig(name='master', env='test'), app=AppSectionConfig(name='master', env='test'),
@ -719,6 +749,124 @@ def test_reconciliation_uses_registry_backed_active_count_metric() -> None:
assert tracer.spans[0][2].attrs['sandbox.active_count'] == 7 assert tracer.spans[0][2].attrs['sandbox.active_count'] == 7
def test_reconciliation_records_error_when_state_source_fails() -> None:
logger = FakeLogger()
metrics = RecordingMetrics()
tracer = RecordingTracer()
state_error = RuntimeError('state_failed')
state_source = FailingSandboxState(state_error)
reconciler = SandboxSessionReconciler(
state_source=state_source,
registry=CountingRegistry(count_active_result=7),
logger=logger,
metrics=metrics,
tracer=tracer,
)
with pytest.raises(RuntimeError, match='state_failed') as excinfo:
reconciler.execute()
assert state_source.calls == 1
assert metrics.set_calls == []
spans = [
span
for name, _, span in tracer.spans
if name == 'adapter.sandbox.reconcile_sessions'
]
assert spans
span = spans[0]
assert span.attrs['sandbox.result'] == 'error'
assert 'sandbox.discovered_count' not in span.attrs
assert 'sandbox.active_count' not in span.attrs
assert excinfo.value in span.errors
def test_reconciliation_records_error_without_active_count_metric_on_registry_failure() -> (
None
):
logger = FakeLogger()
metrics = RecordingMetrics()
tracer = RecordingTracer()
created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
session = SandboxSession(
session_id=SESSION_ID,
chat_id=CHAT_ID,
container_id='container-123',
status=SandboxStatus.RUNNING,
created_at=created_at,
expires_at=created_at + timedelta(minutes=5),
)
registry_error = RuntimeError('replace_failed')
registry = FailingRegistry(registry_error)
reconciler = SandboxSessionReconciler(
state_source=FixedSandboxState([session]),
registry=registry,
logger=logger,
metrics=metrics,
tracer=tracer,
)
with pytest.raises(RuntimeError, match='replace_failed') as excinfo:
reconciler.execute()
assert registry.replaced_sessions == [session]
assert registry.count_calls == 0
assert metrics.set_calls == []
spans = [
span
for name, _, span in tracer.spans
if name == 'adapter.sandbox.reconcile_sessions'
]
assert spans
span = spans[0]
assert span.attrs['sandbox.discovered_count'] == 1
assert span.attrs['sandbox.result'] == 'error'
assert 'sandbox.active_count' not in span.attrs
assert excinfo.value in span.errors
def test_reconciliation_records_error_when_registry_count_active_fails() -> None:
logger = FakeLogger()
metrics = RecordingMetrics()
tracer = RecordingTracer()
created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
session = SandboxSession(
session_id=SESSION_ID,
chat_id=CHAT_ID,
container_id='container-123',
status=SandboxStatus.RUNNING,
created_at=created_at,
expires_at=created_at + timedelta(minutes=5),
)
registry_error = RuntimeError('count_failed')
registry = FailingRegistry(registry_error, fail_on='count_active')
reconciler = SandboxSessionReconciler(
state_source=FixedSandboxState([session]),
registry=registry,
logger=logger,
metrics=metrics,
tracer=tracer,
)
with pytest.raises(RuntimeError, match='count_failed') as excinfo:
reconciler.execute()
assert registry.replaced_sessions == [session]
assert registry.count_calls == 1
assert metrics.set_calls == []
spans = [
span
for name, _, span in tracer.spans
if name == 'adapter.sandbox.reconcile_sessions'
]
assert spans
span = spans[0]
assert span.attrs['sandbox.discovered_count'] == 1
assert 'sandbox.active_count' not in span.attrs
assert span.attrs['sandbox.result'] == 'error'
assert excinfo.value in span.errors
def test_build_container_wires_observability_into_runtime_and_reconciler( def test_build_container_wires_observability_into_runtime_and_reconciler(
monkeypatch, monkeypatch,
) -> None: ) -> None:

View file

@ -83,6 +83,7 @@ class FakeContainers:
self.run_result = run_result or FakeContainer('container-123') self.run_result = run_result or FakeContainer('container-123')
self.get_result: FakeContainer | Exception | None = None self.get_result: FakeContainer | Exception | None = None
self.list_result: list[object] = [] self.list_result: list[object] = []
self.list_error: Exception | None = None
def run( def run(
self, self,
@ -114,6 +115,8 @@ class FakeContainers:
def list(self, *, filters: dict[str, list[str]]) -> list[object]: def list(self, *, filters: dict[str, list[str]]) -> list[object]:
self.list_calls.append({'filters': filters}) self.list_calls.append({'filters': filters})
if self.list_error is not None:
raise self.list_error
return self.list_result return self.list_result
@ -690,3 +693,40 @@ def test_runtime_list_active_records_observability(tmp_path: Path) -> None:
}, },
) )
assert not span.errors assert not span.errors
def test_runtime_list_active_error_records_observability(tmp_path: Path) -> None:
config = build_config(tmp_path)
containers = FakeContainers()
containers.list_error = DockerException('boom')
metrics = RecordingMetrics()
tracer = RecordingTracer()
runtime = DockerSandboxRuntime(
config,
FakeDockerClient(containers),
metrics,
tracer,
)
with pytest.raises(SandboxError) as excinfo:
runtime.list_active_sessions()
assert str(excinfo.value) == 'sandbox_list_failed'
_find_increment_call(
metrics,
'sandbox.runtime.error.total',
attrs={'operation': 'list_active', 'error.type': 'DockerException'},
)
duration_call = _find_record_call(
metrics,
'sandbox.runtime.list_active.duration_ms',
attrs={'operation': 'list_active', 'result': 'error'},
)
assert duration_call[1] >= 0
span = _find_span(
tracer,
'adapter.docker.list_active_sandboxes',
span_attrs={'sandbox.result': 'error'},
)
assert isinstance(excinfo.value.__cause__, DockerException)
assert excinfo.value in span.errors