add sandbox observability failure tests
This commit is contained in:
parent
02770bce7d
commit
b4a2a9ceea
3 changed files with 268 additions and 6 deletions
2
tasks.md
2
tasks.md
|
|
@ -305,7 +305,7 @@
|
||||||
### M25. Добрать failure-path observability regression tests
|
### M25. Добрать failure-path observability regression tests
|
||||||
|
|
||||||
- Субагент: `test-engineer`
|
- Субагент: `test-engineer`
|
||||||
- Статус: pending
|
- Статус: completed
|
||||||
- Зависимости: `M24`
|
- Зависимости: `M24`
|
||||||
- Commit required: yes
|
- Commit required: yes
|
||||||
- Commit message: `add sandbox observability failure tests`
|
- Commit message: `add sandbox observability failure tests`
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,27 @@ class FakeListedContainer(FakeContainer):
|
||||||
self.attrs = {'Created': created_at}
|
self.attrs = {'Created': created_at}
|
||||||
|
|
||||||
|
|
||||||
|
class FailingStopContainer(FakeListedContainer):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
container_id: str,
|
||||||
|
*,
|
||||||
|
labels: dict[str, str],
|
||||||
|
created_at: str,
|
||||||
|
error: Exception,
|
||||||
|
) -> None:
|
||||||
|
super().__init__(
|
||||||
|
container_id,
|
||||||
|
labels=labels,
|
||||||
|
created_at=created_at,
|
||||||
|
)
|
||||||
|
self._error = error
|
||||||
|
|
||||||
|
def stop(self) -> None:
|
||||||
|
self.stop_calls += 1
|
||||||
|
raise self._error
|
||||||
|
|
||||||
|
|
||||||
class RunKwargs(TypedDict):
|
class RunKwargs(TypedDict):
|
||||||
detach: bool
|
detach: bool
|
||||||
labels: dict[str, str]
|
labels: dict[str, str]
|
||||||
|
|
@ -435,23 +456,143 @@ def test_runtime_stop_ignores_missing_container(tmp_path: Path) -> None:
|
||||||
config = build_config(tmp_path)
|
config = build_config(tmp_path)
|
||||||
containers = FakeContainers()
|
containers = FakeContainers()
|
||||||
containers.get_result = NotFound('missing')
|
containers.get_result = NotFound('missing')
|
||||||
runtime = build_runtime(config, containers)
|
metrics = RecordingMetrics()
|
||||||
|
tracer = RecordingTracer()
|
||||||
|
runtime = DockerSandboxRuntime(
|
||||||
|
config,
|
||||||
|
FakeDockerClient(containers),
|
||||||
|
metrics,
|
||||||
|
tracer,
|
||||||
|
)
|
||||||
|
|
||||||
runtime.stop('container-123')
|
runtime.stop('container-123')
|
||||||
|
|
||||||
assert containers.get_calls == ['container-123']
|
assert containers.get_calls == ['container-123']
|
||||||
|
duration_call = _find_record_call(
|
||||||
|
metrics,
|
||||||
|
'sandbox.runtime.stop.duration_ms',
|
||||||
|
attrs={'operation': 'stop', 'result': 'not_found'},
|
||||||
|
)
|
||||||
|
assert duration_call[1] >= 0
|
||||||
|
span = _find_span(
|
||||||
|
tracer,
|
||||||
|
'adapter.docker.stop_sandbox',
|
||||||
|
{'container.id': 'container-123'},
|
||||||
|
{'sandbox.result': 'not_found'},
|
||||||
|
)
|
||||||
|
assert not span.errors
|
||||||
|
stop_error_calls = [
|
||||||
|
call
|
||||||
|
for call in metrics.increment_calls
|
||||||
|
if call[0] == 'sandbox.runtime.error.total'
|
||||||
|
and call[2] is not None
|
||||||
|
and call[2].get('operation') == 'stop'
|
||||||
|
]
|
||||||
|
assert stop_error_calls == []
|
||||||
|
|
||||||
|
|
||||||
def test_runtime_stop_wraps_docker_errors(tmp_path: Path) -> None:
|
def test_runtime_stop_wraps_docker_errors(tmp_path: Path) -> None:
|
||||||
config = build_config(tmp_path)
|
config = build_config(tmp_path)
|
||||||
containers = FakeContainers()
|
containers = FakeContainers()
|
||||||
containers.get_result = DockerException('boom')
|
containers.get_result = FailingStopContainer(
|
||||||
runtime = build_runtime(config, containers)
|
'container-123',
|
||||||
|
labels={
|
||||||
|
'session_id': str(SESSION_ID),
|
||||||
|
'chat_id': str(CHAT_ID),
|
||||||
|
'expires_at': '2026-04-02T12:05:00+00:00',
|
||||||
|
},
|
||||||
|
created_at='2026-04-02T12:00:00Z',
|
||||||
|
error=DockerException('boom'),
|
||||||
|
)
|
||||||
|
metrics = RecordingMetrics()
|
||||||
|
tracer = RecordingTracer()
|
||||||
|
runtime = DockerSandboxRuntime(
|
||||||
|
config,
|
||||||
|
FakeDockerClient(containers),
|
||||||
|
metrics,
|
||||||
|
tracer,
|
||||||
|
)
|
||||||
|
|
||||||
with pytest.raises(SandboxError) as excinfo:
|
with pytest.raises(SandboxError) as excinfo:
|
||||||
runtime.stop('container-123')
|
runtime.stop('container-123')
|
||||||
|
|
||||||
assert str(excinfo.value) == 'sandbox_stop_failed'
|
assert str(excinfo.value) == 'sandbox_stop_failed'
|
||||||
|
_find_increment_call(
|
||||||
|
metrics,
|
||||||
|
'sandbox.runtime.error.total',
|
||||||
|
attrs={'operation': 'stop', 'error.type': 'DockerException'},
|
||||||
|
)
|
||||||
|
duration_call = _find_record_call(
|
||||||
|
metrics,
|
||||||
|
'sandbox.runtime.stop.duration_ms',
|
||||||
|
attrs={'operation': 'stop', 'result': 'error'},
|
||||||
|
)
|
||||||
|
assert duration_call[1] >= 0
|
||||||
|
span = _find_span(
|
||||||
|
tracer,
|
||||||
|
'adapter.docker.stop_sandbox',
|
||||||
|
{'container.id': 'container-123'},
|
||||||
|
{
|
||||||
|
'session.id': str(SESSION_ID),
|
||||||
|
'chat.id': str(CHAT_ID),
|
||||||
|
'sandbox.result': 'error',
|
||||||
|
},
|
||||||
|
)
|
||||||
|
cause = excinfo.value.__cause__
|
||||||
|
assert isinstance(cause, DockerException)
|
||||||
|
assert cause in span.errors
|
||||||
|
|
||||||
|
|
||||||
|
def test_runtime_stop_records_observability_on_success(tmp_path: Path) -> None:
|
||||||
|
config = build_config(tmp_path)
|
||||||
|
containers = FakeContainers()
|
||||||
|
container = FakeListedContainer(
|
||||||
|
'container-123',
|
||||||
|
labels={
|
||||||
|
'session_id': str(SESSION_ID),
|
||||||
|
'chat_id': str(CHAT_ID),
|
||||||
|
'expires_at': '2026-04-02T12:05:00+00:00',
|
||||||
|
},
|
||||||
|
created_at='2026-04-02T12:00:00Z',
|
||||||
|
)
|
||||||
|
containers.get_result = container
|
||||||
|
metrics = RecordingMetrics()
|
||||||
|
tracer = RecordingTracer()
|
||||||
|
runtime = DockerSandboxRuntime(
|
||||||
|
config,
|
||||||
|
FakeDockerClient(containers),
|
||||||
|
metrics,
|
||||||
|
tracer,
|
||||||
|
)
|
||||||
|
|
||||||
|
runtime.stop('container-123')
|
||||||
|
|
||||||
|
assert container.stop_calls == 1
|
||||||
|
duration_call = _find_record_call(
|
||||||
|
metrics,
|
||||||
|
'sandbox.runtime.stop.duration_ms',
|
||||||
|
attrs={'operation': 'stop', 'result': 'stopped'},
|
||||||
|
)
|
||||||
|
assert duration_call[1] >= 0
|
||||||
|
span = _find_span(
|
||||||
|
tracer,
|
||||||
|
'adapter.docker.stop_sandbox',
|
||||||
|
{'container.id': 'container-123'},
|
||||||
|
{
|
||||||
|
'session.id': str(SESSION_ID),
|
||||||
|
'chat.id': str(CHAT_ID),
|
||||||
|
'sandbox.result': 'stopped',
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert not span.errors
|
||||||
|
stop_error_calls = [
|
||||||
|
call
|
||||||
|
for call in metrics.increment_calls
|
||||||
|
if call[0] == 'sandbox.runtime.error.total'
|
||||||
|
and call[2] is not None
|
||||||
|
and call[2].get('operation') == 'stop'
|
||||||
|
]
|
||||||
|
assert stop_error_calls == []
|
||||||
|
|
||||||
|
|
||||||
def test_runtime_list_active_sessions_reads_valid_labeled_containers(
|
def test_runtime_list_active_sessions_reads_valid_labeled_containers(
|
||||||
|
|
|
||||||
|
|
@ -281,6 +281,22 @@ class StaleSnapshotRepository(InMemorySandboxSessionRepository):
|
||||||
return [self._snapshot]
|
return [self._snapshot]
|
||||||
|
|
||||||
|
|
||||||
|
class FailingSaveRepository(InMemorySandboxSessionRepository):
|
||||||
|
def __init__(self, error: Exception) -> None:
|
||||||
|
super().__init__()
|
||||||
|
self._error = error
|
||||||
|
self._fail_next_save = False
|
||||||
|
|
||||||
|
def fail_next_save(self) -> None:
|
||||||
|
self._fail_next_save = True
|
||||||
|
|
||||||
|
def save(self, session: SandboxSession) -> None:
|
||||||
|
if self._fail_next_save:
|
||||||
|
self._fail_next_save = False
|
||||||
|
raise self._error
|
||||||
|
super().save(session)
|
||||||
|
|
||||||
|
|
||||||
class FakeRuntime:
|
class FakeRuntime:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
self.create_calls: list[dict[str, object]] = []
|
self.create_calls: list[dict[str, object]] = []
|
||||||
|
|
@ -706,6 +722,64 @@ def test_create_sandbox_replace_stop_failure_preserves_separate_identities(
|
||||||
assert excinfo.value in span.errors
|
assert excinfo.value in span.errors
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_sandbox_replace_save_failure_records_stage_safe_trace_ids(
|
||||||
|
monkeypatch,
|
||||||
|
) -> None:
|
||||||
|
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
|
||||||
|
expired_session = SandboxSession(
|
||||||
|
session_id=SESSION_OLD_ID,
|
||||||
|
chat_id=CHAT_ID,
|
||||||
|
container_id='container-old',
|
||||||
|
status=SandboxStatus.RUNNING,
|
||||||
|
created_at=now - timedelta(minutes=10),
|
||||||
|
expires_at=now,
|
||||||
|
)
|
||||||
|
repository = FailingSaveRepository(RuntimeError('save_failed'))
|
||||||
|
repository.save(expired_session)
|
||||||
|
repository.fail_next_save()
|
||||||
|
metrics = RecordingMetrics()
|
||||||
|
tracer = RecordingTracer()
|
||||||
|
runtime = FakeRuntime()
|
||||||
|
usecase = CreateSandbox(
|
||||||
|
repository=repository,
|
||||||
|
locker=FakeLocker(),
|
||||||
|
runtime=runtime,
|
||||||
|
clock=FakeClock(now),
|
||||||
|
logger=FakeLogger(),
|
||||||
|
metrics=metrics,
|
||||||
|
tracer=tracer,
|
||||||
|
ttl=timedelta(minutes=5),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
|
||||||
|
|
||||||
|
with pytest.raises(RuntimeError, match='save_failed') as excinfo:
|
||||||
|
usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
|
||||||
|
|
||||||
|
assert runtime.stop_calls == ['container-old']
|
||||||
|
assert len(runtime.create_calls) == 1
|
||||||
|
assert repository.get_active_by_chat_id(CHAT_ID) is None
|
||||||
|
_assert_increment_metric_present(
|
||||||
|
metrics,
|
||||||
|
'sandbox.create.total',
|
||||||
|
attrs={'result': 'error'},
|
||||||
|
)
|
||||||
|
span = _find_span(
|
||||||
|
tracer,
|
||||||
|
'usecase.create_sandbox',
|
||||||
|
{'chat.id': str(CHAT_ID)},
|
||||||
|
{
|
||||||
|
'sandbox.previous_session.id': str(SESSION_OLD_ID),
|
||||||
|
'sandbox.previous_container.id': 'container-old',
|
||||||
|
'sandbox.new_session.id': str(SESSION_NEW_ID),
|
||||||
|
'sandbox.new_container.id': f'container-{SESSION_NEW_ID}',
|
||||||
|
'sandbox.result': 'error',
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert 'session.id' not in span.attrs
|
||||||
|
assert 'container.id' not in span.attrs
|
||||||
|
assert excinfo.value in span.errors
|
||||||
|
|
||||||
|
|
||||||
def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id(
|
def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id(
|
||||||
monkeypatch,
|
monkeypatch,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|
@ -982,6 +1056,8 @@ def test_cleanup_expired_sandboxes_continues_after_stop_failure() -> None:
|
||||||
repository.save(cleaned_session)
|
repository.save(cleaned_session)
|
||||||
runtime = FailingStopRuntime('container-fail')
|
runtime = FailingStopRuntime('container-fail')
|
||||||
logger = FakeLogger()
|
logger = FakeLogger()
|
||||||
|
metrics = RecordingMetrics()
|
||||||
|
tracer = RecordingTracer()
|
||||||
locker = FakeLocker()
|
locker = FakeLocker()
|
||||||
usecase = CleanupExpiredSandboxes(
|
usecase = CleanupExpiredSandboxes(
|
||||||
repository=repository,
|
repository=repository,
|
||||||
|
|
@ -989,8 +1065,8 @@ def test_cleanup_expired_sandboxes_continues_after_stop_failure() -> None:
|
||||||
runtime=runtime,
|
runtime=runtime,
|
||||||
clock=FakeClock(now),
|
clock=FakeClock(now),
|
||||||
logger=logger,
|
logger=logger,
|
||||||
metrics=NoopMetrics(),
|
metrics=metrics,
|
||||||
tracer=NoopTracer(),
|
tracer=tracer,
|
||||||
)
|
)
|
||||||
|
|
||||||
result = usecase.execute()
|
result = usecase.execute()
|
||||||
|
|
@ -1021,3 +1097,48 @@ def test_cleanup_expired_sandboxes_continues_after_stop_failure() -> None:
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
_assert_increment_metric_present(
|
||||||
|
metrics,
|
||||||
|
'sandbox.cleanup.error.total',
|
||||||
|
attrs={'error.type': 'RuntimeError'},
|
||||||
|
)
|
||||||
|
_assert_increment_metric_present(
|
||||||
|
metrics,
|
||||||
|
'sandbox.cleanup.total',
|
||||||
|
attrs={'result': 'cleaned'},
|
||||||
|
)
|
||||||
|
assert _active_count_values(metrics)
|
||||||
|
assert _active_count_values(metrics)[-1] == 1
|
||||||
|
root_span = _find_span(
|
||||||
|
tracer,
|
||||||
|
'usecase.cleanup_expired_sandboxes',
|
||||||
|
span_attrs={
|
||||||
|
'sandbox.expired_count': 2,
|
||||||
|
'sandbox.cleaned_count': 1,
|
||||||
|
'sandbox.error_count': 1,
|
||||||
|
'sandbox.result': 'completed_with_errors',
|
||||||
|
},
|
||||||
|
)
|
||||||
|
assert not root_span.errors
|
||||||
|
failed_span = _find_span(
|
||||||
|
tracer,
|
||||||
|
'usecase.cleanup_expired_sandbox',
|
||||||
|
{
|
||||||
|
'chat.id': str(FAIL_CHAT_ID),
|
||||||
|
'session.id': str(SESSION_FAIL_ID),
|
||||||
|
'container.id': 'container-fail',
|
||||||
|
},
|
||||||
|
{'sandbox.result': 'error'},
|
||||||
|
)
|
||||||
|
assert [str(error) for error in failed_span.errors] == ['stop_failed']
|
||||||
|
cleaned_span = _find_span(
|
||||||
|
tracer,
|
||||||
|
'usecase.cleanup_expired_sandbox',
|
||||||
|
{
|
||||||
|
'chat.id': str(CLEAN_CHAT_ID),
|
||||||
|
'session.id': str(SESSION_CLEAN_ID),
|
||||||
|
'container.id': 'container-clean',
|
||||||
|
},
|
||||||
|
{'sandbox.result': 'cleaned'},
|
||||||
|
)
|
||||||
|
assert not cleaned_span.errors
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue