add sandbox observability tests

This commit is contained in:
Azamat 2026-04-03 01:34:10 +03:00
parent 8d3a080d45
commit dff28efecf
3 changed files with 621 additions and 1 deletions

View file

@ -269,7 +269,7 @@
### M22. Тесты на sandbox observability ### M22. Тесты на sandbox observability
- Субагент: `test-engineer` - Субагент: `test-engineer`
- Статус: pending - Статус: completed
- Зависимости: `M20`, `M21` - Зависимости: `M20`, `M21`
- Commit required: yes - Commit required: yes
- Commit message: `add sandbox observability tests` - Commit message: `add sandbox observability tests`

View file

@ -1,5 +1,6 @@
from datetime import UTC, datetime, timedelta from datetime import UTC, datetime, timedelta
from pathlib import Path from pathlib import Path
from types import TracebackType
from typing import Any, TypedDict from typing import Any, TypedDict
from uuid import UUID from uuid import UUID
@ -13,6 +14,7 @@ from adapter.docker.runtime import DockerSandboxRuntime
from adapter.observability.noop import NoopMetrics, NoopTracer from adapter.observability.noop import NoopMetrics, NoopTracer
from domain.error import SandboxError, SandboxStartError from domain.error import SandboxError, SandboxStartError
from domain.sandbox import SandboxSession, SandboxStatus from domain.sandbox import SandboxSession, SandboxStatus
from usecase.interface import AttrValue, Attrs
CHAT_ID = UUID('123e4567-e89b-12d3-a456-426614174000') CHAT_ID = UUID('123e4567-e89b-12d3-a456-426614174000')
NON_CANONICAL_CHAT_ID = '123E4567E89B12D3A456426614174000' NON_CANONICAL_CHAT_ID = '123E4567E89B12D3A456426614174000'
@ -103,6 +105,140 @@ class FakeDockerClient(DockerClient):
return self._containers return self._containers
class RecordingMetrics:
def __init__(self) -> None:
self.increment_calls: list[tuple[str, int, Attrs | None]] = []
self.record_calls: list[tuple[str, float, Attrs | None]] = []
self.set_calls: list[tuple[str, int | float, Attrs | None]] = []
def increment(
self,
name: str,
value: int = 1,
attrs: Attrs | None = None,
) -> None:
self.increment_calls.append((name, value, attrs))
def record(
self,
name: str,
value: float,
attrs: Attrs | None = None,
) -> None:
self.record_calls.append((name, value, attrs))
def set(
self,
name: str,
value: int | float,
attrs: Attrs | None = None,
) -> None:
self.set_calls.append((name, value, attrs))
class RecordingSpan:
def __init__(self) -> None:
self.attrs: dict[str, AttrValue] = {}
self.errors: list[Exception] = []
def set_attribute(self, name: str, value: AttrValue) -> None:
self.attrs[name] = value
def record_error(self, error: Exception) -> None:
self.errors.append(error)
class RecordingSpanContext:
def __init__(self, span: RecordingSpan) -> None:
self._span = span
def __enter__(self) -> RecordingSpan:
return self._span
def __exit__(
self,
exc_type: type[BaseException] | None,
exc: BaseException | None,
traceback: TracebackType | None,
) -> bool | None:
return None
class RecordingTracer:
def __init__(self) -> None:
self.spans: list[tuple[str, Attrs | None, RecordingSpan]] = []
def start_span(
self,
name: str,
attrs: Attrs | None = None,
) -> RecordingSpanContext:
span = RecordingSpan()
self.spans.append((name, attrs, span))
return RecordingSpanContext(span)
def _attrs_include(
actual: Attrs | dict[str, AttrValue] | None,
expected: dict[str, AttrValue],
) -> bool:
if actual is None:
return False
return all(actual.get(name) == value for name, value in expected.items())
def _find_span(
tracer: RecordingTracer,
name: str,
attrs: dict[str, AttrValue] | None = None,
span_attrs: dict[str, AttrValue] | None = None,
) -> RecordingSpan:
for recorded_name, recorded_attrs, span in tracer.spans:
if recorded_name != name:
continue
if attrs is not None and not _attrs_include(recorded_attrs, attrs):
continue
if span_attrs is not None and not _attrs_include(span.attrs, span_attrs):
continue
return span
raise AssertionError(f'missing span {name}')
def _find_increment_call(
metrics: RecordingMetrics,
name: str,
*,
value: int = 1,
attrs: dict[str, AttrValue] | None = None,
) -> tuple[str, int, Attrs | None]:
for recorded_name, recorded_value, recorded_attrs in metrics.increment_calls:
if recorded_name != name or recorded_value != value:
continue
if attrs is not None and not _attrs_include(recorded_attrs, attrs):
continue
return recorded_name, recorded_value, recorded_attrs
raise AssertionError(f'missing increment metric {name}')
def _find_record_call(
metrics: RecordingMetrics,
name: str,
*,
attrs: dict[str, AttrValue] | None = None,
) -> tuple[str, float, Attrs | None]:
for recorded_name, recorded_value, recorded_attrs in metrics.record_calls:
if recorded_name != name:
continue
if attrs is not None and not _attrs_include(recorded_attrs, attrs):
continue
return recorded_name, recorded_value, recorded_attrs
raise AssertionError(f'missing record metric {name}')
def build_config(tmp_path: Path) -> SandboxConfig: def build_config(tmp_path: Path) -> SandboxConfig:
return SandboxConfig( return SandboxConfig(
image='sandbox:latest', image='sandbox:latest',
@ -187,6 +323,48 @@ def test_runtime_create_applies_mount_policy_and_labels_with_canonical_chat_id(
] ]
def test_runtime_create_records_observability(tmp_path: Path) -> None:
config = build_config(tmp_path)
(tmp_path / 'dependencies').mkdir()
(tmp_path / 'lambda-tools').mkdir()
containers = FakeContainers()
metrics = RecordingMetrics()
tracer = RecordingTracer()
runtime = DockerSandboxRuntime(
config,
FakeDockerClient(containers),
metrics,
tracer,
)
created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
expires_at = created_at + timedelta(minutes=5)
session = runtime.create(
session_id=SESSION_ID,
chat_id=CHAT_ID,
created_at=created_at,
expires_at=expires_at,
)
assert session.container_id == 'container-123'
duration_call = _find_record_call(
metrics,
'sandbox.runtime.create.duration_ms',
attrs={'operation': 'create', 'result': 'created'},
)
assert duration_call[1] >= 0
span = _find_span(
tracer,
'adapter.docker.create_sandbox',
{'chat.id': str(CHAT_ID), 'session.id': str(SESSION_ID)},
{
'container.id': 'container-123',
'sandbox.result': 'created',
},
)
assert not span.errors
def test_runtime_create_raises_start_error_when_container_id_is_missing( def test_runtime_create_raises_start_error_when_container_id_is_missing(
tmp_path: Path, tmp_path: Path,
) -> None: ) -> None:
@ -208,6 +386,51 @@ def test_runtime_create_raises_start_error_when_container_id_is_missing(
assert excinfo.value.chat_id == str(CHAT_ID) assert excinfo.value.chat_id == str(CHAT_ID)
def test_runtime_create_error_records_observability_when_container_id_missing(
tmp_path: Path,
) -> None:
config = build_config(tmp_path)
(tmp_path / 'dependencies').mkdir()
(tmp_path / 'lambda-tools').mkdir()
containers = FakeContainers(run_result=FakeContainer(''))
metrics = RecordingMetrics()
tracer = RecordingTracer()
runtime = DockerSandboxRuntime(
config,
FakeDockerClient(containers),
metrics,
tracer,
)
with pytest.raises(SandboxStartError) as excinfo:
runtime.create(
session_id=SESSION_ID,
chat_id=CHAT_ID,
created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC),
expires_at=datetime(2026, 4, 2, 12, 5, tzinfo=UTC),
)
assert str(excinfo.value) == 'sandbox_start_failed'
_find_increment_call(
metrics,
'sandbox.runtime.error.total',
attrs={'operation': 'create', 'error.type': 'SandboxStartError'},
)
duration_call = _find_record_call(
metrics,
'sandbox.runtime.create.duration_ms',
attrs={'operation': 'create', 'result': 'error'},
)
assert duration_call[1] >= 0
span = _find_span(
tracer,
'adapter.docker.create_sandbox',
{'chat.id': str(CHAT_ID), 'session.id': str(SESSION_ID)},
{'sandbox.result': 'error'},
)
assert excinfo.value in span.errors
def test_runtime_stop_ignores_missing_container(tmp_path: Path) -> None: def test_runtime_stop_ignores_missing_container(tmp_path: Path) -> None:
config = build_config(tmp_path) config = build_config(tmp_path)
containers = FakeContainers() containers = FakeContainers()
@ -273,3 +496,56 @@ def test_runtime_list_active_sessions_reads_valid_labeled_containers(
assert containers.list_calls == [ assert containers.list_calls == [
{'filters': {'label': ['session_id', 'chat_id', 'expires_at']}} {'filters': {'label': ['session_id', 'chat_id', 'expires_at']}}
] ]
def test_runtime_list_active_records_observability(tmp_path: Path) -> None:
config = build_config(tmp_path)
containers = FakeContainers()
expires_at = datetime(2026, 4, 2, 12, 5, tzinfo=UTC)
containers.list_result = [
FakeListedContainer(
'container-123',
labels={
'session_id': str(SESSION_ID),
'chat_id': str(CHAT_ID),
'expires_at': expires_at.isoformat(),
},
created_at='2026-04-02T12:00:00Z',
),
FakeListedContainer(
'container-bad',
labels={
'chat_id': str(CHAT_ID),
'expires_at': expires_at.isoformat(),
},
created_at='2026-04-02T12:01:00Z',
),
]
metrics = RecordingMetrics()
tracer = RecordingTracer()
runtime = DockerSandboxRuntime(
config,
FakeDockerClient(containers),
metrics,
tracer,
)
sessions = runtime.list_active_sessions()
assert len(sessions) == 1
duration_call = _find_record_call(
metrics,
'sandbox.runtime.list_active.duration_ms',
attrs={'operation': 'list_active', 'result': 'listed'},
)
assert duration_call[1] >= 0
span = _find_span(
tracer,
'adapter.docker.list_active_sandboxes',
span_attrs={
'sandbox.container_count': 2,
'sandbox.active_count': 1,
'sandbox.result': 'listed',
},
)
assert not span.errors

View file

@ -1,11 +1,15 @@
import threading import threading
from datetime import UTC, datetime, timedelta from datetime import UTC, datetime, timedelta
from types import TracebackType
from uuid import UUID from uuid import UUID
import pytest
from adapter.observability.noop import NoopMetrics, NoopTracer from adapter.observability.noop import NoopMetrics, NoopTracer
from domain.sandbox import SandboxSession, SandboxStatus from domain.sandbox import SandboxSession, SandboxStatus
from repository.sandbox_lock import ProcessLocalSandboxLifecycleLocker from repository.sandbox_lock import ProcessLocalSandboxLifecycleLocker
from repository.sandbox_session import InMemorySandboxSessionRepository from repository.sandbox_session import InMemorySandboxSessionRepository
from usecase.interface import AttrValue, Attrs
from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox, CreateSandboxCommand from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox, CreateSandboxCommand
CHAT_ID = UUID('11111111-1111-1111-1111-111111111111') CHAT_ID = UUID('11111111-1111-1111-1111-111111111111')
@ -53,6 +57,130 @@ class FakeLogger:
self.messages.append(('error', message, attrs)) self.messages.append(('error', message, attrs))
class RecordingMetrics:
def __init__(self) -> None:
self.increment_calls: list[tuple[str, int, Attrs | None]] = []
self.record_calls: list[tuple[str, float, Attrs | None]] = []
self.set_calls: list[tuple[str, int | float, Attrs | None]] = []
def increment(
self,
name: str,
value: int = 1,
attrs: Attrs | None = None,
) -> None:
self.increment_calls.append((name, value, attrs))
def record(
self,
name: str,
value: float,
attrs: Attrs | None = None,
) -> None:
self.record_calls.append((name, value, attrs))
def set(
self,
name: str,
value: int | float,
attrs: Attrs | None = None,
) -> None:
self.set_calls.append((name, value, attrs))
class RecordingSpan:
def __init__(self) -> None:
self.attrs: dict[str, AttrValue] = {}
self.errors: list[Exception] = []
def set_attribute(self, name: str, value: AttrValue) -> None:
self.attrs[name] = value
def record_error(self, error: Exception) -> None:
self.errors.append(error)
class RecordingSpanContext:
def __init__(self, span: RecordingSpan) -> None:
self._span = span
def __enter__(self) -> RecordingSpan:
return self._span
def __exit__(
self,
exc_type: type[BaseException] | None,
exc: BaseException | None,
traceback: TracebackType | None,
) -> bool | None:
return None
class RecordingTracer:
def __init__(self) -> None:
self.spans: list[tuple[str, Attrs | None, RecordingSpan]] = []
def start_span(
self,
name: str,
attrs: Attrs | None = None,
) -> RecordingSpanContext:
span = RecordingSpan()
self.spans.append((name, attrs, span))
return RecordingSpanContext(span)
def _attrs_include(
actual: Attrs | dict[str, AttrValue] | None,
expected: dict[str, AttrValue],
) -> bool:
if actual is None:
return False
return all(actual.get(name) == value for name, value in expected.items())
def _find_span(
tracer: RecordingTracer,
name: str,
attrs: dict[str, AttrValue] | None = None,
span_attrs: dict[str, AttrValue] | None = None,
) -> RecordingSpan:
for recorded_name, recorded_attrs, span in tracer.spans:
if recorded_name != name:
continue
if attrs is not None and not _attrs_include(recorded_attrs, attrs):
continue
if span_attrs is not None and not _attrs_include(span.attrs, span_attrs):
continue
return span
raise AssertionError(f'missing span {name}')
def _assert_increment_metric_present(
metrics: RecordingMetrics,
name: str,
*,
value: int = 1,
attrs: dict[str, AttrValue] | None = None,
) -> None:
for recorded_name, recorded_value, recorded_attrs in metrics.increment_calls:
if recorded_name != name or recorded_value != value:
continue
if attrs is not None and not _attrs_include(recorded_attrs, attrs):
continue
return
raise AssertionError(f'missing increment metric {name}')
def _active_count_values(metrics: RecordingMetrics) -> list[int | float]:
return [
value for name, value, _ in metrics.set_calls if name == 'sandbox.active.count'
]
class FakeLockContext: class FakeLockContext:
def __enter__(self) -> None: def __enter__(self) -> None:
return None return None
@ -198,6 +326,30 @@ class FailingStopRuntime(FakeRuntime):
raise RuntimeError('stop_failed') raise RuntimeError('stop_failed')
class FailingCreateRuntime(FakeRuntime):
def __init__(self, error: Exception) -> None:
super().__init__()
self._error = error
def create(
self,
*,
session_id: UUID,
chat_id: UUID,
created_at: datetime,
expires_at: datetime,
) -> SandboxSession:
self.create_calls.append(
{
'session_id': session_id,
'chat_id': chat_id,
'created_at': created_at,
'expires_at': expires_at,
}
)
raise self._error
def test_create_sandbox_reuses_active_session_when_not_expired() -> None: def test_create_sandbox_reuses_active_session_when_not_expired() -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
session = SandboxSession( session = SandboxSession(
@ -244,6 +396,104 @@ def test_create_sandbox_reuses_active_session_when_not_expired() -> None:
] ]
def test_create_sandbox_reuse_records_observability() -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
session = SandboxSession(
session_id=SESSION_REUSED_ID,
chat_id=CHAT_ID,
container_id='container-1',
status=SandboxStatus.RUNNING,
created_at=now - timedelta(minutes=1),
expires_at=now + timedelta(minutes=4),
)
repository = InMemorySandboxSessionRepository()
repository.save(session)
metrics = RecordingMetrics()
tracer = RecordingTracer()
usecase = CreateSandbox(
repository=repository,
locker=FakeLocker(),
runtime=FakeRuntime(),
clock=FakeClock(now),
logger=FakeLogger(),
metrics=metrics,
tracer=tracer,
ttl=timedelta(minutes=5),
)
result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
assert result == session
_assert_increment_metric_present(
metrics,
'sandbox.create.total',
attrs={'result': 'reused'},
)
span = _find_span(
tracer,
'usecase.create_sandbox',
{'chat.id': str(CHAT_ID)},
{
'session.id': str(SESSION_REUSED_ID),
'container.id': 'container-1',
'sandbox.result': 'reused',
},
)
assert not span.errors
def test_create_sandbox_replace_records_observability_and_final_active_count(
monkeypatch,
) -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
expired_session = SandboxSession(
session_id=SESSION_OLD_ID,
chat_id=CHAT_ID,
container_id='container-old',
status=SandboxStatus.RUNNING,
created_at=now - timedelta(minutes=10),
expires_at=now,
)
repository = InMemorySandboxSessionRepository()
repository.save(expired_session)
metrics = RecordingMetrics()
tracer = RecordingTracer()
usecase = CreateSandbox(
repository=repository,
locker=FakeLocker(),
runtime=FakeRuntime(),
clock=FakeClock(now),
logger=FakeLogger(),
metrics=metrics,
tracer=tracer,
ttl=timedelta(minutes=5),
)
monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
assert result.session_id == SESSION_NEW_ID
assert repository.count_active() == 1
_assert_increment_metric_present(
metrics,
'sandbox.create.total',
attrs={'result': 'replaced'},
)
assert _active_count_values(metrics)
assert _active_count_values(metrics)[-1] == 1
span = _find_span(
tracer,
'usecase.create_sandbox',
{'chat.id': str(CHAT_ID)},
{
'session.id': str(SESSION_NEW_ID),
'container.id': f'container-{SESSION_NEW_ID}',
'sandbox.result': 'replaced',
},
)
assert not span.errors
def test_create_sandbox_replaces_expired_session_and_creates_new_one( def test_create_sandbox_replaces_expired_session_and_creates_new_one(
monkeypatch, monkeypatch,
) -> None: ) -> None:
@ -363,6 +613,42 @@ def test_create_sandbox_creates_new_session_when_none_exists() -> None:
] ]
def test_create_sandbox_error_records_observability(monkeypatch) -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
metrics = RecordingMetrics()
tracer = RecordingTracer()
usecase = CreateSandbox(
repository=InMemorySandboxSessionRepository(),
locker=FakeLocker(),
runtime=FailingCreateRuntime(RuntimeError('create_failed')),
clock=FakeClock(now),
logger=FakeLogger(),
metrics=metrics,
tracer=tracer,
ttl=timedelta(minutes=5),
)
monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
with pytest.raises(RuntimeError, match='create_failed') as excinfo:
usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
_assert_increment_metric_present(
metrics,
'sandbox.create.total',
attrs={'result': 'error'},
)
span = _find_span(
tracer,
'usecase.create_sandbox',
{'chat.id': str(CHAT_ID)},
{
'session.id': str(SESSION_NEW_ID),
'sandbox.result': 'error',
},
)
assert excinfo.value in span.errors
def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id( def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id(
monkeypatch, monkeypatch,
) -> None: ) -> None:
@ -516,6 +802,64 @@ def test_cleanup_expired_sandboxes_stops_and_deletes_only_expired_sessions() ->
] ]
def test_cleanup_expired_sandboxes_records_observability_on_cleaned_session() -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
expired_session = SandboxSession(
session_id=SESSION_EXPIRED_ID,
chat_id=EXPIRED_CHAT_ID,
container_id='container-expired',
status=SandboxStatus.RUNNING,
created_at=now - timedelta(minutes=10),
expires_at=now - timedelta(seconds=1),
)
repository = InMemorySandboxSessionRepository()
repository.save(expired_session)
metrics = RecordingMetrics()
tracer = RecordingTracer()
usecase = CleanupExpiredSandboxes(
repository=repository,
locker=FakeLocker(),
runtime=FakeRuntime(),
clock=FakeClock(now),
logger=FakeLogger(),
metrics=metrics,
tracer=tracer,
)
result = usecase.execute()
assert result == [expired_session]
_assert_increment_metric_present(
metrics,
'sandbox.cleanup.total',
attrs={'result': 'cleaned'},
)
assert _active_count_values(metrics)
assert _active_count_values(metrics)[-1] == 0
root_span = _find_span(
tracer,
'usecase.cleanup_expired_sandboxes',
span_attrs={
'sandbox.expired_count': 1,
'sandbox.cleaned_count': 1,
'sandbox.error_count': 0,
'sandbox.result': 'completed',
},
)
assert not root_span.errors
cleanup_span = _find_span(
tracer,
'usecase.cleanup_expired_sandbox',
{
'chat.id': str(EXPIRED_CHAT_ID),
'session.id': str(SESSION_EXPIRED_ID),
'container.id': 'container-expired',
},
{'sandbox.result': 'cleaned'},
)
assert not cleanup_span.errors
def test_cleanup_expired_sandboxes_skips_replaced_session_from_stale_snapshot() -> None: def test_cleanup_expired_sandboxes_skips_replaced_session_from_stale_snapshot() -> None:
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
expired_snapshot = SandboxSession( expired_snapshot = SandboxSession(