diff --git a/tasks.md b/tasks.md index 012111d..e01dcde 100644 --- a/tasks.md +++ b/tasks.md @@ -269,7 +269,7 @@ ### M22. Тесты на sandbox observability - Субагент: `test-engineer` -- Статус: pending +- Статус: completed - Зависимости: `M20`, `M21` - Commit required: yes - Commit message: `add sandbox observability tests` diff --git a/test/test_docker_runtime.py b/test/test_docker_runtime.py index ee6a2a4..4db1095 100644 --- a/test/test_docker_runtime.py +++ b/test/test_docker_runtime.py @@ -1,5 +1,6 @@ from datetime import UTC, datetime, timedelta from pathlib import Path +from types import TracebackType from typing import Any, TypedDict from uuid import UUID @@ -13,6 +14,7 @@ from adapter.docker.runtime import DockerSandboxRuntime from adapter.observability.noop import NoopMetrics, NoopTracer from domain.error import SandboxError, SandboxStartError from domain.sandbox import SandboxSession, SandboxStatus +from usecase.interface import AttrValue, Attrs CHAT_ID = UUID('123e4567-e89b-12d3-a456-426614174000') NON_CANONICAL_CHAT_ID = '123E4567E89B12D3A456426614174000' @@ -103,6 +105,140 @@ class FakeDockerClient(DockerClient): return self._containers +class RecordingMetrics: + def __init__(self) -> None: + self.increment_calls: list[tuple[str, int, Attrs | None]] = [] + self.record_calls: list[tuple[str, float, Attrs | None]] = [] + self.set_calls: list[tuple[str, int | float, Attrs | None]] = [] + + def increment( + self, + name: str, + value: int = 1, + attrs: Attrs | None = None, + ) -> None: + self.increment_calls.append((name, value, attrs)) + + def record( + self, + name: str, + value: float, + attrs: Attrs | None = None, + ) -> None: + self.record_calls.append((name, value, attrs)) + + def set( + self, + name: str, + value: int | float, + attrs: Attrs | None = None, + ) -> None: + self.set_calls.append((name, value, attrs)) + + +class RecordingSpan: + def __init__(self) -> None: + self.attrs: dict[str, AttrValue] = {} + self.errors: list[Exception] = [] + + def set_attribute(self, name: str, value: AttrValue) -> None: + self.attrs[name] = value + + def record_error(self, error: Exception) -> None: + self.errors.append(error) + + +class RecordingSpanContext: + def __init__(self, span: RecordingSpan) -> None: + self._span = span + + def __enter__(self) -> RecordingSpan: + return self._span + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + traceback: TracebackType | None, + ) -> bool | None: + return None + + +class RecordingTracer: + def __init__(self) -> None: + self.spans: list[tuple[str, Attrs | None, RecordingSpan]] = [] + + def start_span( + self, + name: str, + attrs: Attrs | None = None, + ) -> RecordingSpanContext: + span = RecordingSpan() + self.spans.append((name, attrs, span)) + return RecordingSpanContext(span) + + +def _attrs_include( + actual: Attrs | dict[str, AttrValue] | None, + expected: dict[str, AttrValue], +) -> bool: + if actual is None: + return False + + return all(actual.get(name) == value for name, value in expected.items()) + + +def _find_span( + tracer: RecordingTracer, + name: str, + attrs: dict[str, AttrValue] | None = None, + span_attrs: dict[str, AttrValue] | None = None, +) -> RecordingSpan: + for recorded_name, recorded_attrs, span in tracer.spans: + if recorded_name != name: + continue + if attrs is not None and not _attrs_include(recorded_attrs, attrs): + continue + if span_attrs is not None and not _attrs_include(span.attrs, span_attrs): + continue + return span + + raise AssertionError(f'missing span {name}') + + +def _find_increment_call( + metrics: RecordingMetrics, + name: str, + *, + value: int = 1, + attrs: dict[str, AttrValue] | None = None, +) -> tuple[str, int, Attrs | None]: + for recorded_name, recorded_value, recorded_attrs in metrics.increment_calls: + if recorded_name != name or recorded_value != value: + continue + if attrs is not None and not _attrs_include(recorded_attrs, attrs): + continue + return recorded_name, recorded_value, recorded_attrs + + raise AssertionError(f'missing increment metric {name}') + + +def _find_record_call( + metrics: RecordingMetrics, + name: str, + *, + attrs: dict[str, AttrValue] | None = None, +) -> tuple[str, float, Attrs | None]: + for recorded_name, recorded_value, recorded_attrs in metrics.record_calls: + if recorded_name != name: + continue + if attrs is not None and not _attrs_include(recorded_attrs, attrs): + continue + return recorded_name, recorded_value, recorded_attrs + + raise AssertionError(f'missing record metric {name}') + + def build_config(tmp_path: Path) -> SandboxConfig: return SandboxConfig( image='sandbox:latest', @@ -187,6 +323,48 @@ def test_runtime_create_applies_mount_policy_and_labels_with_canonical_chat_id( ] +def test_runtime_create_records_observability(tmp_path: Path) -> None: + config = build_config(tmp_path) + (tmp_path / 'dependencies').mkdir() + (tmp_path / 'lambda-tools').mkdir() + containers = FakeContainers() + metrics = RecordingMetrics() + tracer = RecordingTracer() + runtime = DockerSandboxRuntime( + config, + FakeDockerClient(containers), + metrics, + tracer, + ) + created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + expires_at = created_at + timedelta(minutes=5) + + session = runtime.create( + session_id=SESSION_ID, + chat_id=CHAT_ID, + created_at=created_at, + expires_at=expires_at, + ) + + assert session.container_id == 'container-123' + duration_call = _find_record_call( + metrics, + 'sandbox.runtime.create.duration_ms', + attrs={'operation': 'create', 'result': 'created'}, + ) + assert duration_call[1] >= 0 + span = _find_span( + tracer, + 'adapter.docker.create_sandbox', + {'chat.id': str(CHAT_ID), 'session.id': str(SESSION_ID)}, + { + 'container.id': 'container-123', + 'sandbox.result': 'created', + }, + ) + assert not span.errors + + def test_runtime_create_raises_start_error_when_container_id_is_missing( tmp_path: Path, ) -> None: @@ -208,6 +386,51 @@ def test_runtime_create_raises_start_error_when_container_id_is_missing( assert excinfo.value.chat_id == str(CHAT_ID) +def test_runtime_create_error_records_observability_when_container_id_missing( + tmp_path: Path, +) -> None: + config = build_config(tmp_path) + (tmp_path / 'dependencies').mkdir() + (tmp_path / 'lambda-tools').mkdir() + containers = FakeContainers(run_result=FakeContainer('')) + metrics = RecordingMetrics() + tracer = RecordingTracer() + runtime = DockerSandboxRuntime( + config, + FakeDockerClient(containers), + metrics, + tracer, + ) + + with pytest.raises(SandboxStartError) as excinfo: + runtime.create( + session_id=SESSION_ID, + chat_id=CHAT_ID, + created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC), + expires_at=datetime(2026, 4, 2, 12, 5, tzinfo=UTC), + ) + + assert str(excinfo.value) == 'sandbox_start_failed' + _find_increment_call( + metrics, + 'sandbox.runtime.error.total', + attrs={'operation': 'create', 'error.type': 'SandboxStartError'}, + ) + duration_call = _find_record_call( + metrics, + 'sandbox.runtime.create.duration_ms', + attrs={'operation': 'create', 'result': 'error'}, + ) + assert duration_call[1] >= 0 + span = _find_span( + tracer, + 'adapter.docker.create_sandbox', + {'chat.id': str(CHAT_ID), 'session.id': str(SESSION_ID)}, + {'sandbox.result': 'error'}, + ) + assert excinfo.value in span.errors + + def test_runtime_stop_ignores_missing_container(tmp_path: Path) -> None: config = build_config(tmp_path) containers = FakeContainers() @@ -273,3 +496,56 @@ def test_runtime_list_active_sessions_reads_valid_labeled_containers( assert containers.list_calls == [ {'filters': {'label': ['session_id', 'chat_id', 'expires_at']}} ] + + +def test_runtime_list_active_records_observability(tmp_path: Path) -> None: + config = build_config(tmp_path) + containers = FakeContainers() + expires_at = datetime(2026, 4, 2, 12, 5, tzinfo=UTC) + containers.list_result = [ + FakeListedContainer( + 'container-123', + labels={ + 'session_id': str(SESSION_ID), + 'chat_id': str(CHAT_ID), + 'expires_at': expires_at.isoformat(), + }, + created_at='2026-04-02T12:00:00Z', + ), + FakeListedContainer( + 'container-bad', + labels={ + 'chat_id': str(CHAT_ID), + 'expires_at': expires_at.isoformat(), + }, + created_at='2026-04-02T12:01:00Z', + ), + ] + metrics = RecordingMetrics() + tracer = RecordingTracer() + runtime = DockerSandboxRuntime( + config, + FakeDockerClient(containers), + metrics, + tracer, + ) + + sessions = runtime.list_active_sessions() + + assert len(sessions) == 1 + duration_call = _find_record_call( + metrics, + 'sandbox.runtime.list_active.duration_ms', + attrs={'operation': 'list_active', 'result': 'listed'}, + ) + assert duration_call[1] >= 0 + span = _find_span( + tracer, + 'adapter.docker.list_active_sandboxes', + span_attrs={ + 'sandbox.container_count': 2, + 'sandbox.active_count': 1, + 'sandbox.result': 'listed', + }, + ) + assert not span.errors diff --git a/test/test_sandbox_usecase.py b/test/test_sandbox_usecase.py index f744492..403eb0f 100644 --- a/test/test_sandbox_usecase.py +++ b/test/test_sandbox_usecase.py @@ -1,11 +1,15 @@ import threading from datetime import UTC, datetime, timedelta +from types import TracebackType from uuid import UUID +import pytest + from adapter.observability.noop import NoopMetrics, NoopTracer from domain.sandbox import SandboxSession, SandboxStatus from repository.sandbox_lock import ProcessLocalSandboxLifecycleLocker from repository.sandbox_session import InMemorySandboxSessionRepository +from usecase.interface import AttrValue, Attrs from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox, CreateSandboxCommand CHAT_ID = UUID('11111111-1111-1111-1111-111111111111') @@ -53,6 +57,130 @@ class FakeLogger: self.messages.append(('error', message, attrs)) +class RecordingMetrics: + def __init__(self) -> None: + self.increment_calls: list[tuple[str, int, Attrs | None]] = [] + self.record_calls: list[tuple[str, float, Attrs | None]] = [] + self.set_calls: list[tuple[str, int | float, Attrs | None]] = [] + + def increment( + self, + name: str, + value: int = 1, + attrs: Attrs | None = None, + ) -> None: + self.increment_calls.append((name, value, attrs)) + + def record( + self, + name: str, + value: float, + attrs: Attrs | None = None, + ) -> None: + self.record_calls.append((name, value, attrs)) + + def set( + self, + name: str, + value: int | float, + attrs: Attrs | None = None, + ) -> None: + self.set_calls.append((name, value, attrs)) + + +class RecordingSpan: + def __init__(self) -> None: + self.attrs: dict[str, AttrValue] = {} + self.errors: list[Exception] = [] + + def set_attribute(self, name: str, value: AttrValue) -> None: + self.attrs[name] = value + + def record_error(self, error: Exception) -> None: + self.errors.append(error) + + +class RecordingSpanContext: + def __init__(self, span: RecordingSpan) -> None: + self._span = span + + def __enter__(self) -> RecordingSpan: + return self._span + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + traceback: TracebackType | None, + ) -> bool | None: + return None + + +class RecordingTracer: + def __init__(self) -> None: + self.spans: list[tuple[str, Attrs | None, RecordingSpan]] = [] + + def start_span( + self, + name: str, + attrs: Attrs | None = None, + ) -> RecordingSpanContext: + span = RecordingSpan() + self.spans.append((name, attrs, span)) + return RecordingSpanContext(span) + + +def _attrs_include( + actual: Attrs | dict[str, AttrValue] | None, + expected: dict[str, AttrValue], +) -> bool: + if actual is None: + return False + + return all(actual.get(name) == value for name, value in expected.items()) + + +def _find_span( + tracer: RecordingTracer, + name: str, + attrs: dict[str, AttrValue] | None = None, + span_attrs: dict[str, AttrValue] | None = None, +) -> RecordingSpan: + for recorded_name, recorded_attrs, span in tracer.spans: + if recorded_name != name: + continue + if attrs is not None and not _attrs_include(recorded_attrs, attrs): + continue + if span_attrs is not None and not _attrs_include(span.attrs, span_attrs): + continue + return span + + raise AssertionError(f'missing span {name}') + + +def _assert_increment_metric_present( + metrics: RecordingMetrics, + name: str, + *, + value: int = 1, + attrs: dict[str, AttrValue] | None = None, +) -> None: + for recorded_name, recorded_value, recorded_attrs in metrics.increment_calls: + if recorded_name != name or recorded_value != value: + continue + if attrs is not None and not _attrs_include(recorded_attrs, attrs): + continue + return + + raise AssertionError(f'missing increment metric {name}') + + +def _active_count_values(metrics: RecordingMetrics) -> list[int | float]: + return [ + value for name, value, _ in metrics.set_calls if name == 'sandbox.active.count' + ] + + class FakeLockContext: def __enter__(self) -> None: return None @@ -198,6 +326,30 @@ class FailingStopRuntime(FakeRuntime): raise RuntimeError('stop_failed') +class FailingCreateRuntime(FakeRuntime): + def __init__(self, error: Exception) -> None: + super().__init__() + self._error = error + + def create( + self, + *, + session_id: UUID, + chat_id: UUID, + created_at: datetime, + expires_at: datetime, + ) -> SandboxSession: + self.create_calls.append( + { + 'session_id': session_id, + 'chat_id': chat_id, + 'created_at': created_at, + 'expires_at': expires_at, + } + ) + raise self._error + + def test_create_sandbox_reuses_active_session_when_not_expired() -> None: now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) session = SandboxSession( @@ -244,6 +396,104 @@ def test_create_sandbox_reuses_active_session_when_not_expired() -> None: ] +def test_create_sandbox_reuse_records_observability() -> None: + now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + session = SandboxSession( + session_id=SESSION_REUSED_ID, + chat_id=CHAT_ID, + container_id='container-1', + status=SandboxStatus.RUNNING, + created_at=now - timedelta(minutes=1), + expires_at=now + timedelta(minutes=4), + ) + repository = InMemorySandboxSessionRepository() + repository.save(session) + metrics = RecordingMetrics() + tracer = RecordingTracer() + usecase = CreateSandbox( + repository=repository, + locker=FakeLocker(), + runtime=FakeRuntime(), + clock=FakeClock(now), + logger=FakeLogger(), + metrics=metrics, + tracer=tracer, + ttl=timedelta(minutes=5), + ) + + result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) + + assert result == session + _assert_increment_metric_present( + metrics, + 'sandbox.create.total', + attrs={'result': 'reused'}, + ) + span = _find_span( + tracer, + 'usecase.create_sandbox', + {'chat.id': str(CHAT_ID)}, + { + 'session.id': str(SESSION_REUSED_ID), + 'container.id': 'container-1', + 'sandbox.result': 'reused', + }, + ) + assert not span.errors + + +def test_create_sandbox_replace_records_observability_and_final_active_count( + monkeypatch, +) -> None: + now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + expired_session = SandboxSession( + session_id=SESSION_OLD_ID, + chat_id=CHAT_ID, + container_id='container-old', + status=SandboxStatus.RUNNING, + created_at=now - timedelta(minutes=10), + expires_at=now, + ) + repository = InMemorySandboxSessionRepository() + repository.save(expired_session) + metrics = RecordingMetrics() + tracer = RecordingTracer() + usecase = CreateSandbox( + repository=repository, + locker=FakeLocker(), + runtime=FakeRuntime(), + clock=FakeClock(now), + logger=FakeLogger(), + metrics=metrics, + tracer=tracer, + ttl=timedelta(minutes=5), + ) + monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID) + + result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) + + assert result.session_id == SESSION_NEW_ID + assert repository.count_active() == 1 + _assert_increment_metric_present( + metrics, + 'sandbox.create.total', + attrs={'result': 'replaced'}, + ) + assert _active_count_values(metrics) + assert _active_count_values(metrics)[-1] == 1 + span = _find_span( + tracer, + 'usecase.create_sandbox', + {'chat.id': str(CHAT_ID)}, + { + 'session.id': str(SESSION_NEW_ID), + 'container.id': f'container-{SESSION_NEW_ID}', + 'sandbox.result': 'replaced', + }, + ) + assert not span.errors + + def test_create_sandbox_replaces_expired_session_and_creates_new_one( monkeypatch, ) -> None: @@ -363,6 +613,42 @@ def test_create_sandbox_creates_new_session_when_none_exists() -> None: ] +def test_create_sandbox_error_records_observability(monkeypatch) -> None: + now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + metrics = RecordingMetrics() + tracer = RecordingTracer() + usecase = CreateSandbox( + repository=InMemorySandboxSessionRepository(), + locker=FakeLocker(), + runtime=FailingCreateRuntime(RuntimeError('create_failed')), + clock=FakeClock(now), + logger=FakeLogger(), + metrics=metrics, + tracer=tracer, + ttl=timedelta(minutes=5), + ) + monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID) + + with pytest.raises(RuntimeError, match='create_failed') as excinfo: + usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) + + _assert_increment_metric_present( + metrics, + 'sandbox.create.total', + attrs={'result': 'error'}, + ) + span = _find_span( + tracer, + 'usecase.create_sandbox', + {'chat.id': str(CHAT_ID)}, + { + 'session.id': str(SESSION_NEW_ID), + 'sandbox.result': 'error', + }, + ) + assert excinfo.value in span.errors + + def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id( monkeypatch, ) -> None: @@ -516,6 +802,64 @@ def test_cleanup_expired_sandboxes_stops_and_deletes_only_expired_sessions() -> ] +def test_cleanup_expired_sandboxes_records_observability_on_cleaned_session() -> None: + now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + expired_session = SandboxSession( + session_id=SESSION_EXPIRED_ID, + chat_id=EXPIRED_CHAT_ID, + container_id='container-expired', + status=SandboxStatus.RUNNING, + created_at=now - timedelta(minutes=10), + expires_at=now - timedelta(seconds=1), + ) + repository = InMemorySandboxSessionRepository() + repository.save(expired_session) + metrics = RecordingMetrics() + tracer = RecordingTracer() + usecase = CleanupExpiredSandboxes( + repository=repository, + locker=FakeLocker(), + runtime=FakeRuntime(), + clock=FakeClock(now), + logger=FakeLogger(), + metrics=metrics, + tracer=tracer, + ) + + result = usecase.execute() + + assert result == [expired_session] + _assert_increment_metric_present( + metrics, + 'sandbox.cleanup.total', + attrs={'result': 'cleaned'}, + ) + assert _active_count_values(metrics) + assert _active_count_values(metrics)[-1] == 0 + root_span = _find_span( + tracer, + 'usecase.cleanup_expired_sandboxes', + span_attrs={ + 'sandbox.expired_count': 1, + 'sandbox.cleaned_count': 1, + 'sandbox.error_count': 0, + 'sandbox.result': 'completed', + }, + ) + assert not root_span.errors + cleanup_span = _find_span( + tracer, + 'usecase.cleanup_expired_sandbox', + { + 'chat.id': str(EXPIRED_CHAT_ID), + 'session.id': str(SESSION_EXPIRED_ID), + 'container.id': 'container-expired', + }, + {'sandbox.result': 'cleaned'}, + ) + assert not cleanup_span.errors + + def test_cleanup_expired_sandboxes_skips_replaced_session_from_stale_snapshot() -> None: now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) expired_snapshot = SandboxSession(