add sandbox observability tests

2026-04-03 01:34:10 +03:00 · 2026-04-03 01:34:10 +03:00 · dff28efecf
commit dff28efecf
parent 8d3a080d45
3 changed files with 621 additions and 1 deletions
--- a/tasks.md
+++ b/tasks.md
@ -269,7 +269,7 @@
 ### M22. Тесты на sandbox observability

 - Субагент: `test-engineer`
- Статус: pending
+- Статус: completed
 - Зависимости: `M20`, `M21`
 - Commit required: yes
 - Commit message: `add sandbox observability tests`
--- a/test/test_docker_runtime.py
+++ b/test/test_docker_runtime.py
@ -1,5 +1,6 @@
 from datetime import UTC, datetime, timedelta
 from pathlib import Path
+from types import TracebackType
 from typing import Any, TypedDict
 from uuid import UUID

@ -13,6 +14,7 @@ from adapter.docker.runtime import DockerSandboxRuntime
 from adapter.observability.noop import NoopMetrics, NoopTracer
 from domain.error import SandboxError, SandboxStartError
 from domain.sandbox import SandboxSession, SandboxStatus
+from usecase.interface import AttrValue, Attrs

 CHAT_ID = UUID('123e4567-e89b-12d3-a456-426614174000')
 NON_CANONICAL_CHAT_ID = '123E4567E89B12D3A456426614174000'
@ -103,6 +105,140 @@ class FakeDockerClient(DockerClient):
        return self._containers


+class RecordingMetrics:
+    def __init__(self) -> None:
+        self.increment_calls: list[tuple[str, int, Attrs | None]] = []
+        self.record_calls: list[tuple[str, float, Attrs | None]] = []
+        self.set_calls: list[tuple[str, int | float, Attrs | None]] = []
+
+    def increment(
+        self,
+        name: str,
+        value: int = 1,
+        attrs: Attrs | None = None,
+    ) -> None:
+        self.increment_calls.append((name, value, attrs))
+
+    def record(
+        self,
+        name: str,
+        value: float,
+        attrs: Attrs | None = None,
+    ) -> None:
+        self.record_calls.append((name, value, attrs))
+
+    def set(
+        self,
+        name: str,
+        value: int | float,
+        attrs: Attrs | None = None,
+    ) -> None:
+        self.set_calls.append((name, value, attrs))
+
+
+class RecordingSpan:
+    def __init__(self) -> None:
+        self.attrs: dict[str, AttrValue] = {}
+        self.errors: list[Exception] = []
+
+    def set_attribute(self, name: str, value: AttrValue) -> None:
+        self.attrs[name] = value
+
+    def record_error(self, error: Exception) -> None:
+        self.errors.append(error)
+
+
+class RecordingSpanContext:
+    def __init__(self, span: RecordingSpan) -> None:
+        self._span = span
+
+    def __enter__(self) -> RecordingSpan:
+        return self._span
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        traceback: TracebackType | None,
+    ) -> bool | None:
+        return None
+
+
+class RecordingTracer:
+    def __init__(self) -> None:
+        self.spans: list[tuple[str, Attrs | None, RecordingSpan]] = []
+
+    def start_span(
+        self,
+        name: str,
+        attrs: Attrs | None = None,
+    ) -> RecordingSpanContext:
+        span = RecordingSpan()
+        self.spans.append((name, attrs, span))
+        return RecordingSpanContext(span)
+
+
+def _attrs_include(
+    actual: Attrs | dict[str, AttrValue] | None,
+    expected: dict[str, AttrValue],
+) -> bool:
+    if actual is None:
+        return False
+
+    return all(actual.get(name) == value for name, value in expected.items())
+
+
+def _find_span(
+    tracer: RecordingTracer,
+    name: str,
+    attrs: dict[str, AttrValue] | None = None,
+    span_attrs: dict[str, AttrValue] | None = None,
+) -> RecordingSpan:
+    for recorded_name, recorded_attrs, span in tracer.spans:
+        if recorded_name != name:
+            continue
+        if attrs is not None and not _attrs_include(recorded_attrs, attrs):
+            continue
+        if span_attrs is not None and not _attrs_include(span.attrs, span_attrs):
+            continue
+        return span
+
+    raise AssertionError(f'missing span {name}')
+
+
+def _find_increment_call(
+    metrics: RecordingMetrics,
+    name: str,
+    *,
+    value: int = 1,
+    attrs: dict[str, AttrValue] | None = None,
+) -> tuple[str, int, Attrs | None]:
+    for recorded_name, recorded_value, recorded_attrs in metrics.increment_calls:
+        if recorded_name != name or recorded_value != value:
+            continue
+        if attrs is not None and not _attrs_include(recorded_attrs, attrs):
+            continue
+        return recorded_name, recorded_value, recorded_attrs
+
+    raise AssertionError(f'missing increment metric {name}')
+
+
+def _find_record_call(
+    metrics: RecordingMetrics,
+    name: str,
+    *,
+    attrs: dict[str, AttrValue] | None = None,
+) -> tuple[str, float, Attrs | None]:
+    for recorded_name, recorded_value, recorded_attrs in metrics.record_calls:
+        if recorded_name != name:
+            continue
+        if attrs is not None and not _attrs_include(recorded_attrs, attrs):
+            continue
+        return recorded_name, recorded_value, recorded_attrs
+
+    raise AssertionError(f'missing record metric {name}')
+
+
 def build_config(tmp_path: Path) -> SandboxConfig:
    return SandboxConfig(
        image='sandbox:latest',
@ -187,6 +323,48 @@ def test_runtime_create_applies_mount_policy_and_labels_with_canonical_chat_id(
    ]


+def test_runtime_create_records_observability(tmp_path: Path) -> None:
+    config = build_config(tmp_path)
+    (tmp_path / 'dependencies').mkdir()
+    (tmp_path / 'lambda-tools').mkdir()
+    containers = FakeContainers()
+    metrics = RecordingMetrics()
+    tracer = RecordingTracer()
+    runtime = DockerSandboxRuntime(
+        config,
+        FakeDockerClient(containers),
+        metrics,
+        tracer,
+    )
+    created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
+    expires_at = created_at + timedelta(minutes=5)
+
+    session = runtime.create(
+        session_id=SESSION_ID,
+        chat_id=CHAT_ID,
+        created_at=created_at,
+        expires_at=expires_at,
+    )
+
+    assert session.container_id == 'container-123'
+    duration_call = _find_record_call(
+        metrics,
+        'sandbox.runtime.create.duration_ms',
+        attrs={'operation': 'create', 'result': 'created'},
+    )
+    assert duration_call[1] >= 0
+    span = _find_span(
+        tracer,
+        'adapter.docker.create_sandbox',
+        {'chat.id': str(CHAT_ID), 'session.id': str(SESSION_ID)},
+        {
+            'container.id': 'container-123',
+            'sandbox.result': 'created',
+        },
+    )
+    assert not span.errors
+
+
 def test_runtime_create_raises_start_error_when_container_id_is_missing(
    tmp_path: Path,
 ) -> None:
@ -208,6 +386,51 @@ def test_runtime_create_raises_start_error_when_container_id_is_missing(
    assert excinfo.value.chat_id == str(CHAT_ID)


+def test_runtime_create_error_records_observability_when_container_id_missing(
+    tmp_path: Path,
+) -> None:
+    config = build_config(tmp_path)
+    (tmp_path / 'dependencies').mkdir()
+    (tmp_path / 'lambda-tools').mkdir()
+    containers = FakeContainers(run_result=FakeContainer(''))
+    metrics = RecordingMetrics()
+    tracer = RecordingTracer()
+    runtime = DockerSandboxRuntime(
+        config,
+        FakeDockerClient(containers),
+        metrics,
+        tracer,
+    )
+
+    with pytest.raises(SandboxStartError) as excinfo:
+        runtime.create(
+            session_id=SESSION_ID,
+            chat_id=CHAT_ID,
+            created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC),
+            expires_at=datetime(2026, 4, 2, 12, 5, tzinfo=UTC),
+        )
+
+    assert str(excinfo.value) == 'sandbox_start_failed'
+    _find_increment_call(
+        metrics,
+        'sandbox.runtime.error.total',
+        attrs={'operation': 'create', 'error.type': 'SandboxStartError'},
+    )
+    duration_call = _find_record_call(
+        metrics,
+        'sandbox.runtime.create.duration_ms',
+        attrs={'operation': 'create', 'result': 'error'},
+    )
+    assert duration_call[1] >= 0
+    span = _find_span(
+        tracer,
+        'adapter.docker.create_sandbox',
+        {'chat.id': str(CHAT_ID), 'session.id': str(SESSION_ID)},
+        {'sandbox.result': 'error'},
+    )
+    assert excinfo.value in span.errors
+
+
 def test_runtime_stop_ignores_missing_container(tmp_path: Path) -> None:
    config = build_config(tmp_path)
    containers = FakeContainers()
@ -273,3 +496,56 @@ def test_runtime_list_active_sessions_reads_valid_labeled_containers(
    assert containers.list_calls == [
        {'filters': {'label': ['session_id', 'chat_id', 'expires_at']}}
    ]
+
+
+def test_runtime_list_active_records_observability(tmp_path: Path) -> None:
+    config = build_config(tmp_path)
+    containers = FakeContainers()
+    expires_at = datetime(2026, 4, 2, 12, 5, tzinfo=UTC)
+    containers.list_result = [
+        FakeListedContainer(
+            'container-123',
+            labels={
+                'session_id': str(SESSION_ID),
+                'chat_id': str(CHAT_ID),
+                'expires_at': expires_at.isoformat(),
+            },
+            created_at='2026-04-02T12:00:00Z',
+        ),
+        FakeListedContainer(
+            'container-bad',
+            labels={
+                'chat_id': str(CHAT_ID),
+                'expires_at': expires_at.isoformat(),
+            },
+            created_at='2026-04-02T12:01:00Z',
+        ),
+    ]
+    metrics = RecordingMetrics()
+    tracer = RecordingTracer()
+    runtime = DockerSandboxRuntime(
+        config,
+        FakeDockerClient(containers),
+        metrics,
+        tracer,
+    )
+
+    sessions = runtime.list_active_sessions()
+
+    assert len(sessions) == 1
+    duration_call = _find_record_call(
+        metrics,
+        'sandbox.runtime.list_active.duration_ms',
+        attrs={'operation': 'list_active', 'result': 'listed'},
+    )
+    assert duration_call[1] >= 0
+    span = _find_span(
+        tracer,
+        'adapter.docker.list_active_sandboxes',
+        span_attrs={
+            'sandbox.container_count': 2,
+            'sandbox.active_count': 1,
+            'sandbox.result': 'listed',
+        },
+    )
+    assert not span.errors
--- a/test/test_sandbox_usecase.py
+++ b/test/test_sandbox_usecase.py
@ -1,11 +1,15 @@
 import threading
 from datetime import UTC, datetime, timedelta
+from types import TracebackType
 from uuid import UUID

+import pytest
+
 from adapter.observability.noop import NoopMetrics, NoopTracer
 from domain.sandbox import SandboxSession, SandboxStatus
 from repository.sandbox_lock import ProcessLocalSandboxLifecycleLocker
 from repository.sandbox_session import InMemorySandboxSessionRepository
+from usecase.interface import AttrValue, Attrs
 from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox, CreateSandboxCommand

 CHAT_ID = UUID('11111111-1111-1111-1111-111111111111')
@ -53,6 +57,130 @@ class FakeLogger:
        self.messages.append(('error', message, attrs))


+class RecordingMetrics:
+    def __init__(self) -> None:
+        self.increment_calls: list[tuple[str, int, Attrs | None]] = []
+        self.record_calls: list[tuple[str, float, Attrs | None]] = []
+        self.set_calls: list[tuple[str, int | float, Attrs | None]] = []
+
+    def increment(
+        self,
+        name: str,
+        value: int = 1,
+        attrs: Attrs | None = None,
+    ) -> None:
+        self.increment_calls.append((name, value, attrs))
+
+    def record(
+        self,
+        name: str,
+        value: float,
+        attrs: Attrs | None = None,
+    ) -> None:
+        self.record_calls.append((name, value, attrs))
+
+    def set(
+        self,
+        name: str,
+        value: int | float,
+        attrs: Attrs | None = None,
+    ) -> None:
+        self.set_calls.append((name, value, attrs))
+
+
+class RecordingSpan:
+    def __init__(self) -> None:
+        self.attrs: dict[str, AttrValue] = {}
+        self.errors: list[Exception] = []
+
+    def set_attribute(self, name: str, value: AttrValue) -> None:
+        self.attrs[name] = value
+
+    def record_error(self, error: Exception) -> None:
+        self.errors.append(error)
+
+
+class RecordingSpanContext:
+    def __init__(self, span: RecordingSpan) -> None:
+        self._span = span
+
+    def __enter__(self) -> RecordingSpan:
+        return self._span
+
+    def __exit__(
+        self,
+        exc_type: type[BaseException] | None,
+        exc: BaseException | None,
+        traceback: TracebackType | None,
+    ) -> bool | None:
+        return None
+
+
+class RecordingTracer:
+    def __init__(self) -> None:
+        self.spans: list[tuple[str, Attrs | None, RecordingSpan]] = []
+
+    def start_span(
+        self,
+        name: str,
+        attrs: Attrs | None = None,
+    ) -> RecordingSpanContext:
+        span = RecordingSpan()
+        self.spans.append((name, attrs, span))
+        return RecordingSpanContext(span)
+
+
+def _attrs_include(
+    actual: Attrs | dict[str, AttrValue] | None,
+    expected: dict[str, AttrValue],
+) -> bool:
+    if actual is None:
+        return False
+
+    return all(actual.get(name) == value for name, value in expected.items())
+
+
+def _find_span(
+    tracer: RecordingTracer,
+    name: str,
+    attrs: dict[str, AttrValue] | None = None,
+    span_attrs: dict[str, AttrValue] | None = None,
+) -> RecordingSpan:
+    for recorded_name, recorded_attrs, span in tracer.spans:
+        if recorded_name != name:
+            continue
+        if attrs is not None and not _attrs_include(recorded_attrs, attrs):
+            continue
+        if span_attrs is not None and not _attrs_include(span.attrs, span_attrs):
+            continue
+        return span
+
+    raise AssertionError(f'missing span {name}')
+
+
+def _assert_increment_metric_present(
+    metrics: RecordingMetrics,
+    name: str,
+    *,
+    value: int = 1,
+    attrs: dict[str, AttrValue] | None = None,
+) -> None:
+    for recorded_name, recorded_value, recorded_attrs in metrics.increment_calls:
+        if recorded_name != name or recorded_value != value:
+            continue
+        if attrs is not None and not _attrs_include(recorded_attrs, attrs):
+            continue
+        return
+
+    raise AssertionError(f'missing increment metric {name}')
+
+
+def _active_count_values(metrics: RecordingMetrics) -> list[int | float]:
+    return [
+        value for name, value, _ in metrics.set_calls if name == 'sandbox.active.count'
+    ]
+
+
 class FakeLockContext:
    def __enter__(self) -> None:
        return None
@ -198,6 +326,30 @@ class FailingStopRuntime(FakeRuntime):
            raise RuntimeError('stop_failed')


+class FailingCreateRuntime(FakeRuntime):
+    def __init__(self, error: Exception) -> None:
+        super().__init__()
+        self._error = error
+
+    def create(
+        self,
+        *,
+        session_id: UUID,
+        chat_id: UUID,
+        created_at: datetime,
+        expires_at: datetime,
+    ) -> SandboxSession:
+        self.create_calls.append(
+            {
+                'session_id': session_id,
+                'chat_id': chat_id,
+                'created_at': created_at,
+                'expires_at': expires_at,
+            }
+        )
+        raise self._error
+
+
 def test_create_sandbox_reuses_active_session_when_not_expired() -> None:
    now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
    session = SandboxSession(
@ -244,6 +396,104 @@ def test_create_sandbox_reuses_active_session_when_not_expired() -> None:
    ]


+def test_create_sandbox_reuse_records_observability() -> None:
+    now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
+    session = SandboxSession(
+        session_id=SESSION_REUSED_ID,
+        chat_id=CHAT_ID,
+        container_id='container-1',
+        status=SandboxStatus.RUNNING,
+        created_at=now - timedelta(minutes=1),
+        expires_at=now + timedelta(minutes=4),
+    )
+    repository = InMemorySandboxSessionRepository()
+    repository.save(session)
+    metrics = RecordingMetrics()
+    tracer = RecordingTracer()
+    usecase = CreateSandbox(
+        repository=repository,
+        locker=FakeLocker(),
+        runtime=FakeRuntime(),
+        clock=FakeClock(now),
+        logger=FakeLogger(),
+        metrics=metrics,
+        tracer=tracer,
+        ttl=timedelta(minutes=5),
+    )
+
+    result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
+
+    assert result == session
+    _assert_increment_metric_present(
+        metrics,
+        'sandbox.create.total',
+        attrs={'result': 'reused'},
+    )
+    span = _find_span(
+        tracer,
+        'usecase.create_sandbox',
+        {'chat.id': str(CHAT_ID)},
+        {
+            'session.id': str(SESSION_REUSED_ID),
+            'container.id': 'container-1',
+            'sandbox.result': 'reused',
+        },
+    )
+    assert not span.errors
+
+
+def test_create_sandbox_replace_records_observability_and_final_active_count(
+    monkeypatch,
+) -> None:
+    now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
+    expired_session = SandboxSession(
+        session_id=SESSION_OLD_ID,
+        chat_id=CHAT_ID,
+        container_id='container-old',
+        status=SandboxStatus.RUNNING,
+        created_at=now - timedelta(minutes=10),
+        expires_at=now,
+    )
+    repository = InMemorySandboxSessionRepository()
+    repository.save(expired_session)
+    metrics = RecordingMetrics()
+    tracer = RecordingTracer()
+    usecase = CreateSandbox(
+        repository=repository,
+        locker=FakeLocker(),
+        runtime=FakeRuntime(),
+        clock=FakeClock(now),
+        logger=FakeLogger(),
+        metrics=metrics,
+        tracer=tracer,
+        ttl=timedelta(minutes=5),
+    )
+    monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
+
+    result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
+
+    assert result.session_id == SESSION_NEW_ID
+    assert repository.count_active() == 1
+    _assert_increment_metric_present(
+        metrics,
+        'sandbox.create.total',
+        attrs={'result': 'replaced'},
+    )
+    assert _active_count_values(metrics)
+    assert _active_count_values(metrics)[-1] == 1
+    span = _find_span(
+        tracer,
+        'usecase.create_sandbox',
+        {'chat.id': str(CHAT_ID)},
+        {
+            'session.id': str(SESSION_NEW_ID),
+            'container.id': f'container-{SESSION_NEW_ID}',
+            'sandbox.result': 'replaced',
+        },
+    )
+    assert not span.errors
+
+
 def test_create_sandbox_replaces_expired_session_and_creates_new_one(
    monkeypatch,
 ) -> None:
@ -363,6 +613,42 @@ def test_create_sandbox_creates_new_session_when_none_exists() -> None:
    ]


+def test_create_sandbox_error_records_observability(monkeypatch) -> None:
+    now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
+    metrics = RecordingMetrics()
+    tracer = RecordingTracer()
+    usecase = CreateSandbox(
+        repository=InMemorySandboxSessionRepository(),
+        locker=FakeLocker(),
+        runtime=FailingCreateRuntime(RuntimeError('create_failed')),
+        clock=FakeClock(now),
+        logger=FakeLogger(),
+        metrics=metrics,
+        tracer=tracer,
+        ttl=timedelta(minutes=5),
+    )
+    monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
+
+    with pytest.raises(RuntimeError, match='create_failed') as excinfo:
+        usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
+
+    _assert_increment_metric_present(
+        metrics,
+        'sandbox.create.total',
+        attrs={'result': 'error'},
+    )
+    span = _find_span(
+        tracer,
+        'usecase.create_sandbox',
+        {'chat.id': str(CHAT_ID)},
+        {
+            'session.id': str(SESSION_NEW_ID),
+            'sandbox.result': 'error',
+        },
+    )
+    assert excinfo.value in span.errors
+
+
 def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id(
    monkeypatch,
 ) -> None:
@ -516,6 +802,64 @@ def test_cleanup_expired_sandboxes_stops_and_deletes_only_expired_sessions() ->
    ]


+def test_cleanup_expired_sandboxes_records_observability_on_cleaned_session() -> None:
+    now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
+    expired_session = SandboxSession(
+        session_id=SESSION_EXPIRED_ID,
+        chat_id=EXPIRED_CHAT_ID,
+        container_id='container-expired',
+        status=SandboxStatus.RUNNING,
+        created_at=now - timedelta(minutes=10),
+        expires_at=now - timedelta(seconds=1),
+    )
+    repository = InMemorySandboxSessionRepository()
+    repository.save(expired_session)
+    metrics = RecordingMetrics()
+    tracer = RecordingTracer()
+    usecase = CleanupExpiredSandboxes(
+        repository=repository,
+        locker=FakeLocker(),
+        runtime=FakeRuntime(),
+        clock=FakeClock(now),
+        logger=FakeLogger(),
+        metrics=metrics,
+        tracer=tracer,
+    )
+
+    result = usecase.execute()
+
+    assert result == [expired_session]
+    _assert_increment_metric_present(
+        metrics,
+        'sandbox.cleanup.total',
+        attrs={'result': 'cleaned'},
+    )
+    assert _active_count_values(metrics)
+    assert _active_count_values(metrics)[-1] == 0
+    root_span = _find_span(
+        tracer,
+        'usecase.cleanup_expired_sandboxes',
+        span_attrs={
+            'sandbox.expired_count': 1,
+            'sandbox.cleaned_count': 1,
+            'sandbox.error_count': 0,
+            'sandbox.result': 'completed',
+        },
+    )
+    assert not root_span.errors
+    cleanup_span = _find_span(
+        tracer,
+        'usecase.cleanup_expired_sandbox',
+        {
+            'chat.id': str(EXPIRED_CHAT_ID),
+            'session.id': str(SESSION_EXPIRED_ID),
+            'container.id': 'container-expired',
+        },
+        {'sandbox.result': 'cleaned'},
+    )
+    assert not cleanup_span.errors
+
+
 def test_cleanup_expired_sandboxes_skips_replaced_session_from_stale_snapshot() -> None:
    now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
    expired_snapshot = SandboxSession(