From e9ef178b15fb5bb9ea07f7ec6f989242aace03f8 Mon Sep 17 00:00:00 2001 From: Azamat Date: Fri, 3 Apr 2026 00:16:19 +0300 Subject: [PATCH 01/10] [feat] add docker in docker support --- config/docker-compose.yml | 42 +++++++++++++++++++++++++++++++++ docker-compose.yml | 49 +++++++++++++++++++++++++++++---------- 2 files changed, 79 insertions(+), 12 deletions(-) create mode 100644 config/docker-compose.yml diff --git a/config/docker-compose.yml b/config/docker-compose.yml new file mode 100644 index 0000000..a601f99 --- /dev/null +++ b/config/docker-compose.yml @@ -0,0 +1,42 @@ +app: + name: master-service + env: docker-compose + +http: + host: 0.0.0.0 + port: 8123 + +logging: + level: INFO + output: otel + format: json + +metrics: + enabled: true + +tracing: + enabled: true + +otel: + service_name: master-service + logs_endpoint: http://otel-collector:4318/v1/logs + metrics_endpoint: http://otel-collector:4318/v1/metrics + traces_endpoint: http://otel-collector:4318/v1/traces + metric_export_interval: 1000 + +docker: + base_url: tcp://docker-engine:2375 + +sandbox: + image: nginx:1.27-alpine + ttl_seconds: 30 + cleanup_interval_seconds: 5 + chats_root: /var/lib/master-sandbox/chats + dependencies_host_path: /var/lib/master-dependencies + lambda_tools_host_path: /var/lib/master-lambda-tools + chat_mount_path: /workspace/chat + dependencies_mount_path: /opt/dependencies + lambda_tools_mount_path: /opt/lambda-tools + +security: + token_header: X-API-Token diff --git a/docker-compose.yml b/docker-compose.yml index 86e1bbb..24d5bab 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -4,22 +4,43 @@ services: context: . dockerfile: Dockerfile target: run + user: root depends_on: - - otel-collector + docker-engine: + condition: service_healthy + otel-collector: + condition: service_started environment: - APP_API_TOKEN: ${APP_API_TOKEN:?APP_API_TOKEN is required} - APP_SIGNING_KEY: ${APP_SIGNING_KEY:?APP_SIGNING_KEY is required} - APP_ENV: docker - APP_HTTP_HOST: 0.0.0.0 - APP_HTTP_PORT: '8123' - APP_LOGGING_OUTPUT: otel - APP_METRICS_ENABLED: 'true' - APP_TRACING_ENABLED: 'true' - APP_OTEL_LOGS_ENDPOINT: http://otel-collector:4318/v1/logs - APP_OTEL_METRICS_ENDPOINT: http://otel-collector:4318/v1/metrics - APP_OTEL_TRACES_ENDPOINT: http://otel-collector:4318/v1/traces + APP_API_TOKEN: local-api-token + APP_SIGNING_KEY: local-signing-key ports: - '127.0.0.1:8123:8123' + volumes: + - ./config/docker-compose.yml:/app/config/app.yaml:ro + - sandbox-data:/var/lib/master-sandbox + - sandbox-dependencies:/var/lib/master-dependencies:ro + - sandbox-tools:/var/lib/master-lambda-tools:ro + + docker-engine: + image: docker:28-dind + privileged: true + environment: + DOCKER_TLS_CERTDIR: '' + command: + - --host=tcp://0.0.0.0:2375 + healthcheck: + test: + - CMD + - docker + - info + interval: 5s + timeout: 5s + retries: 12 + volumes: + - docker-data:/var/lib/docker + - sandbox-data:/var/lib/master-sandbox + - sandbox-dependencies:/var/lib/master-dependencies + - sandbox-tools:/var/lib/master-lambda-tools otel-collector: image: grafana/otel-lgtm:latest @@ -29,4 +50,8 @@ services: - lgtm-data:/data volumes: + docker-data: lgtm-data: + sandbox-data: + sandbox-dependencies: + sandbox-tools: From a86e1ee8c706f68c25c9f09ebdea600c4ed179d1 Mon Sep 17 00:00:00 2001 From: Azamat Date: Fri, 3 Apr 2026 00:37:35 +0300 Subject: [PATCH 02/10] add sandbox observability contracts --- adapter/observability/noop.py | 8 +++ adapter/otel/metrics.py | 32 +++++++++++ docs/008-sandbox-lifecycle-observability.md | 18 +++++++ repository/sandbox_session.py | 4 ++ tasks.md | 60 +++++++++++++++++++++ usecase/interface.py | 9 ++++ 6 files changed, 131 insertions(+) create mode 100644 docs/008-sandbox-lifecycle-observability.md diff --git a/adapter/observability/noop.py b/adapter/observability/noop.py index fe7d190..7027d41 100644 --- a/adapter/observability/noop.py +++ b/adapter/observability/noop.py @@ -20,6 +20,14 @@ class NoopMetrics: ) -> None: return None + def set( + self, + name: str, + value: int | float, + attrs: Attrs | None = None, + ) -> None: + return None + class NoopSpan: def set_attribute(self, name: str, value: AttrValue) -> None: diff --git a/adapter/otel/metrics.py b/adapter/otel/metrics.py index 48d1278..ed9abe6 100644 --- a/adapter/otel/metrics.py +++ b/adapter/otel/metrics.py @@ -5,12 +5,21 @@ from opentelemetry.metrics import Counter, Histogram, Meter from usecase.interface import Attrs +class _GaugeAdapter: + def __init__(self, gauge: object) -> None: + self._gauge = gauge + + def set(self, value: int | float, attributes: object = None) -> None: + getattr(self._gauge, 'set')(value, attributes=attributes) + + class OtelMetrics: def __init__(self, meter: Meter) -> None: self._meter = meter self._lock = Lock() self._counters: dict[str, Counter] = {} self._histograms: dict[str, Histogram] = {} + self._gauges: dict[str, _GaugeAdapter] = {} def increment( self, @@ -34,6 +43,17 @@ class OtelMetrics: attributes=None if attrs is None else dict(attrs), ) + def set( + self, + name: str, + value: int | float, + attrs: Attrs | None = None, + ) -> None: + self._gauge(name).set( + value, + attributes=None if attrs is None else dict(attrs), + ) + def _counter(self, name: str) -> Counter: counter = self._counters.get(name) if counter is not None: @@ -57,3 +77,15 @@ class OtelMetrics: histogram = self._meter.create_histogram(name) self._histograms[name] = histogram return histogram + + def _gauge(self, name: str) -> _GaugeAdapter: + gauge = self._gauges.get(name) + if gauge is not None: + return gauge + + with self._lock: + gauge = self._gauges.get(name) + if gauge is None: + gauge = _GaugeAdapter(self._meter.create_gauge(name)) + self._gauges[name] = gauge + return gauge diff --git a/docs/008-sandbox-lifecycle-observability.md b/docs/008-sandbox-lifecycle-observability.md new file mode 100644 index 0000000..f56dc10 --- /dev/null +++ b/docs/008-sandbox-lifecycle-observability.md @@ -0,0 +1,18 @@ +# 008 Sandbox lifecycle observability + +## Context +- FR-034 требует метрики по active sandbox, startup latency и cleanup +- Issue #11 требует трассировку sandbox usecase и Docker adapter steps +- Inner layers должны знать только observability ports + +## Decision +- Usecase sandbox lifecycle использует только `Logger`, `Metrics`, `Tracer` +- `Metrics` получает `set(...)` для current-state signals +- `sandbox.active.count` считается из session registry через `count_active()` +- M19 добавляет только contracts и adapter support для будущих lifecycle signals +- M20 и M21 отдельно добавят spans и runtime metrics в usecase и Docker adapter + +## Consequences +- OTel gauge остается в outer adapter, не протекает во внутренние слои +- Active sandbox count синхронизируется после create, cleanup и reconciliation +- Tests могут проверять observability через fake ports без реального OTel backend diff --git a/repository/sandbox_session.py b/repository/sandbox_session.py index 893ec65..bb680d2 100644 --- a/repository/sandbox_session.py +++ b/repository/sandbox_session.py @@ -29,6 +29,10 @@ class InMemorySandboxSessionRepository(SandboxSessionRepository): if session.expires_at <= now ] + def count_active(self) -> int: + with self._lock: + return len(self._sessions_by_chat_id) + def save(self, session: SandboxSession) -> None: with self._lock: self._sessions_by_chat_id[session.chat_id] = session diff --git a/tasks.md b/tasks.md index 494c655..d24657c 100644 --- a/tasks.md +++ b/tasks.md @@ -227,3 +227,63 @@ - Файлы: `domain/sandbox.py`, `usecase/interface.py`, `usecase/sandbox.py`, `repository/sandbox_session.py`, `adapter/http/fastapi/*`, `adapter/docker/runtime.py`, `adapter/di/container.py`, `test/*` - Решение: HTTP boundary принимает/возвращает UUID, usecase и repository работают с UUID objects, Docker labels продолжают сериализоваться в строки через `str(uuid)` - Критерии приемки: внутри sandbox flow `chat_id` и `session_id` больше не строки; `container_id` остается `str`; pydantic корректно сериализует UUID в response; `make pre-commit` проходит + +## Follow-up после issue #11 observability + +### M19. ADR и observability contracts для sandbox lifecycle + +- Исполнитель: `primary-agent` +- Статус: completed +- Зависимости: `M18` +- Commit required: yes +- Commit message: `add sandbox observability contracts` +- Scope: зафиксировать sandbox lifecycle observability policy в ADR-lite и подготовить минимальные контракты для traces и current-state metrics без нарушения clean architecture +- Файлы: `docs/008-sandbox-lifecycle-observability.md`, `usecase/interface.py`, `repository/sandbox_session.py`, `adapter/otel/metrics.py`, `adapter/observability/noop.py` +- Решение: добавить в `Metrics` порт операцию `set(...)` для gauge-like current-state сигналов; добавить в `SandboxSessionRepository` `count_active()` как источник truth для `sandbox.active.count` +- Критерии приемки: ADR занимает 10-20 строк; inner layers по-прежнему знают только порты `Logger`/`Metrics`/`Tracer`; current-state метрика активных sandbox выражается без OTel imports во внутреннем слое + +### M20. Трейсы и метрики в sandbox usecases + +- Субагент: `feature-developer` +- Статус: pending +- Зависимости: `M19` +- Commit required: yes +- Commit message: `instrument sandbox usecases` +- Scope: добавить spans и ключевые lifecycle metrics в `CreateSandbox` и `CleanupExpiredSandboxes` +- Файлы: `usecase/sandbox.py`, `adapter/di/container.py`, при необходимости тесты в `test/*` +- Решение: usecase получает `Metrics` и `Tracer` через конструктор; `CreateSandbox` и `CleanupExpiredSandboxes` публикуют `sandbox.create.total`, `sandbox.cleanup.total`, `sandbox.cleanup.error.total` и обновляют `sandbox.active.count` после мутаций registry +- Критерии приемки: есть spans `usecase.create_sandbox` и `usecase.cleanup_expired_sandboxes`; span attrs и metric attrs включают ключевые lifecycle identifiers/result fields; reuse/replace/cleanup paths наблюдаемы без OTel imports в usecase + +### M21. Трейсы и runtime metrics в Docker adapter и reconciliation + +- Субагент: `feature-developer` +- Статус: pending +- Зависимости: `M19` +- Commit required: yes +- Commit message: `instrument sandbox docker runtime` +- Scope: добавить observability в `DockerSandboxRuntime` и reconciliation path для Docker operations и current-state sync +- Файлы: `adapter/docker/runtime.py`, `adapter/sandbox/reconciliation.py`, `adapter/di/container.py`, при необходимости тесты в `test/*` +- Решение: `DockerSandboxRuntime` получает `Metrics` и `Tracer`; create/stop/list публикуют duration histograms `sandbox.runtime.create.duration_ms`, `sandbox.runtime.stop.duration_ms`, `sandbox.runtime.list_active.duration_ms`, error counter `sandbox.runtime.error.total` и span attrs по chat/session/container; reconciliation обновляет `sandbox.active.count` по registry snapshot +- Критерии приемки: Docker adapter остается во внешнем слое; ошибки Docker операций отражаются в spans и metrics; после startup reconciliation current-state метрика активных sandbox синхронизирована с registry + +### M22. Тесты на sandbox observability + +- Субагент: `test-engineer` +- Статус: pending +- Зависимости: `M20`, `M21` +- Commit required: yes +- Commit message: `add sandbox observability tests` +- Scope: покрыть regression tests новую observability policy без реального OTel backend +- Файлы: `test/test_sandbox_usecase.py`, `test/test_docker_runtime.py`, при необходимости новые focused tests в `test/*` +- Решение: использовать типизированные fake metrics/tracer implementations и проверить names/attrs ключевых spans и metrics на create/reuse/replace/cleanup/runtime paths +- Критерии приемки: тесты подтверждают spans и metrics на usecase и adapter paths; constructor wiring обновлен без mypy regressions; `make typecheck` и релевантный `pytest` проходят + +### M23. Boundary review для sandbox observability + +- Субагент: `code-reviewer` +- Статус: pending +- Зависимости: `M22` +- Commit required: no +- Scope: проверить, что observability изменения закрывают issue #11 и FR-034 без нарушения clean architecture +- Файлы: весь измененный код после `M19`-`M22` +- Критерии приемки: inner layers не импортируют OTel; Docker-specific tracing остается в `adapter/docker/`; current-state и duration metrics достаточно покрывают sandbox lifecycle; замечания сведены к minor или отсутствуют diff --git a/usecase/interface.py b/usecase/interface.py index 15c581a..69876e6 100644 --- a/usecase/interface.py +++ b/usecase/interface.py @@ -24,6 +24,8 @@ class SandboxSessionRepository(Protocol): def list_expired(self, now: datetime) -> list[SandboxSession]: ... + def count_active(self) -> int: ... + def save(self, session: SandboxSession) -> None: ... def delete(self, session_id: UUID) -> None: ... @@ -86,6 +88,13 @@ class Metrics(Protocol): attrs: Attrs | None = None, ) -> None: ... + def set( + self, + name: str, + value: int | float, + attrs: Attrs | None = None, + ) -> None: ... + class Span(Protocol): def set_attribute(self, name: str, value: AttrValue) -> None: ... From 4cdf6e45de0363a2db350ca8e78b4125f1103610 Mon Sep 17 00:00:00 2001 From: Azamat Date: Fri, 3 Apr 2026 00:56:37 +0300 Subject: [PATCH 03/10] instrument sandbox usecases --- adapter/di/container.py | 4 + tasks.md | 2 +- test/test_create_http.py | 4 + test/test_sandbox_usecase.py | 15 +++ usecase/sandbox.py | 215 +++++++++++++++++++++++++++-------- 5 files changed, 191 insertions(+), 49 deletions(-) diff --git a/adapter/di/container.py b/adapter/di/container.py index ace3a42..4b87b2f 100644 --- a/adapter/di/container.py +++ b/adapter/di/container.py @@ -96,6 +96,8 @@ def build_container( runtime=sandbox_runtime, clock=clock, logger=observability.logger, + metrics=observability.metrics, + tracer=observability.tracer, ttl=timedelta(seconds=app_config.sandbox.ttl_seconds), ), cleanup_expired_sandboxes=CleanupExpiredSandboxes( @@ -104,6 +106,8 @@ def build_container( runtime=sandbox_runtime, clock=clock, logger=observability.logger, + metrics=observability.metrics, + tracer=observability.tracer, ), ) diff --git a/tasks.md b/tasks.md index d24657c..a43d96a 100644 --- a/tasks.md +++ b/tasks.md @@ -245,7 +245,7 @@ ### M20. Трейсы и метрики в sandbox usecases - Субагент: `feature-developer` -- Статус: pending +- Статус: completed - Зависимости: `M19` - Commit required: yes - Commit message: `instrument sandbox usecases` diff --git a/test/test_create_http.py b/test/test_create_http.py index 652644b..a25eaba 100644 --- a/test/test_create_http.py +++ b/test/test_create_http.py @@ -502,6 +502,8 @@ def test_startup_reconciliation_reuses_existing_container_after_restart( runtime=runtime, clock=FakeClock(created_at), logger=logger, + metrics=NoopMetrics(), + tracer=NoopTracer(), ttl=timedelta(minutes=5), ), cleanup_expired_sandboxes=CleanupExpiredSandboxes( @@ -510,6 +512,8 @@ def test_startup_reconciliation_reuses_existing_container_after_restart( runtime=runtime, clock=FakeClock(created_at), logger=logger, + metrics=NoopMetrics(), + tracer=NoopTracer(), ), ) container = AppContainer( diff --git a/test/test_sandbox_usecase.py b/test/test_sandbox_usecase.py index 4fedb21..f744492 100644 --- a/test/test_sandbox_usecase.py +++ b/test/test_sandbox_usecase.py @@ -2,6 +2,7 @@ import threading from datetime import UTC, datetime, timedelta from uuid import UUID +from adapter.observability.noop import NoopMetrics, NoopTracer from domain.sandbox import SandboxSession, SandboxStatus from repository.sandbox_lock import ProcessLocalSandboxLifecycleLocker from repository.sandbox_session import InMemorySandboxSessionRepository @@ -218,6 +219,8 @@ def test_create_sandbox_reuses_active_session_when_not_expired() -> None: runtime=runtime, clock=FakeClock(now), logger=logger, + metrics=NoopMetrics(), + tracer=NoopTracer(), ttl=timedelta(minutes=5), ) @@ -264,6 +267,8 @@ def test_create_sandbox_replaces_expired_session_and_creates_new_one( runtime=runtime, clock=FakeClock(now), logger=logger, + metrics=NoopMetrics(), + tracer=NoopTracer(), ttl=timedelta(minutes=5), ) monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID) @@ -323,6 +328,8 @@ def test_create_sandbox_creates_new_session_when_none_exists() -> None: runtime=runtime, clock=FakeClock(now), logger=logger, + metrics=NoopMetrics(), + tracer=NoopTracer(), ttl=timedelta(minutes=5), ) @@ -370,6 +377,8 @@ def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id( runtime=runtime, clock=FakeClock(now), logger=logger, + metrics=NoopMetrics(), + tracer=NoopTracer(), ttl=timedelta(minutes=5), ) monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID) @@ -473,6 +482,8 @@ def test_cleanup_expired_sandboxes_stops_and_deletes_only_expired_sessions() -> runtime=runtime, clock=FakeClock(now), logger=logger, + metrics=NoopMetrics(), + tracer=NoopTracer(), ) result = usecase.execute() @@ -534,6 +545,8 @@ def test_cleanup_expired_sandboxes_skips_replaced_session_from_stale_snapshot() runtime=runtime, clock=FakeClock(now), logger=logger, + metrics=NoopMetrics(), + tracer=NoopTracer(), ) result = usecase.execute() @@ -575,6 +588,8 @@ def test_cleanup_expired_sandboxes_continues_after_stop_failure() -> None: runtime=runtime, clock=FakeClock(now), logger=logger, + metrics=NoopMetrics(), + tracer=NoopTracer(), ) result = usecase.execute() diff --git a/usecase/sandbox.py b/usecase/sandbox.py index 83ee39d..2bdb369 100644 --- a/usecase/sandbox.py +++ b/usecase/sandbox.py @@ -6,9 +6,11 @@ from domain.sandbox import SandboxSession from usecase.interface import ( Clock, Logger, + Metrics, SandboxLifecycleLocker, SandboxRuntime, SandboxSessionRepository, + Tracer, ) @@ -25,6 +27,8 @@ class CreateSandbox: runtime: SandboxRuntime, clock: Clock, logger: Logger, + metrics: Metrics, + tracer: Tracer, ttl: timedelta, ) -> None: self._repository = repository @@ -32,44 +36,80 @@ class CreateSandbox: self._runtime = runtime self._clock = clock self._logger = logger + self._metrics = metrics + self._tracer = tracer self._ttl = ttl def execute(self, command: CreateSandboxCommand) -> SandboxSession: chat_id = command.chat_id - with self._locker.lock(chat_id): - session = self._repository.get_active_by_chat_id(chat_id) - now = self._clock.now() + with self._tracer.start_span( + 'usecase.create_sandbox', + attrs={'chat.id': str(chat_id)}, + ) as span: + try: + with self._locker.lock(chat_id): + session = self._repository.get_active_by_chat_id(chat_id) + now = self._clock.now() - if session is not None and session.expires_at > now: - self._logger.info( - 'sandbox_reused', - attrs=_sandbox_attrs(session), + if session is not None and session.expires_at > now: + span.set_attribute('session.id', str(session.session_id)) + span.set_attribute('container.id', session.container_id) + span.set_attribute('sandbox.result', 'reused') + self._metrics.increment( + 'sandbox.create.total', + attrs=_result_metric_attrs('reused'), + ) + self._logger.info( + 'sandbox_reused', + attrs=_sandbox_attrs(session), + ) + return session + + result = 'created' + if session is not None: + result = 'replaced' + span.set_attribute('session.id', str(session.session_id)) + span.set_attribute('container.id', session.container_id) + self._logger.info( + 'sandbox_replaced', + attrs=_sandbox_attrs(session), + ) + self._runtime.stop(session.container_id) + self._repository.delete(session.session_id) + _set_active_count(self._metrics, self._repository) + + created_at = self._clock.now() + expires_at = created_at + self._ttl + session_id = _new_session_id() + span.set_attribute('session.id', str(session_id)) + new_session = self._runtime.create( + session_id=session_id, + chat_id=chat_id, + created_at=created_at, + expires_at=expires_at, + ) + self._repository.save(new_session) + _set_active_count(self._metrics, self._repository) + span.set_attribute('container.id', new_session.container_id) + span.set_attribute('sandbox.result', result) + self._metrics.increment( + 'sandbox.create.total', + attrs=_result_metric_attrs(result), + ) + self._logger.info( + 'sandbox_created', + attrs=_sandbox_attrs(new_session), + ) + return new_session + except Exception as exc: + span.set_attribute('sandbox.result', 'error') + self._metrics.increment( + 'sandbox.create.total', + attrs=_result_metric_attrs('error'), ) - return session - - if session is not None: - self._logger.info( - 'sandbox_replaced', - attrs=_sandbox_attrs(session), - ) - self._runtime.stop(session.container_id) - self._repository.delete(session.session_id) - - created_at = self._clock.now() - expires_at = created_at + self._ttl - new_session = self._runtime.create( - session_id=_new_session_id(), - chat_id=chat_id, - created_at=created_at, - expires_at=expires_at, - ) - self._repository.save(new_session) - self._logger.info( - 'sandbox_created', - attrs=_sandbox_attrs(new_session), - ) - return new_session + span.record_error(exc) + raise class CleanupExpiredSandboxes: @@ -80,39 +120,84 @@ class CleanupExpiredSandboxes: runtime: SandboxRuntime, clock: Clock, logger: Logger, + metrics: Metrics, + tracer: Tracer, ) -> None: self._repository = repository self._locker = locker self._runtime = runtime self._clock = clock self._logger = logger + self._metrics = metrics + self._tracer = tracer def execute(self) -> list[SandboxSession]: - expired_sessions = self._repository.list_expired(self._clock.now()) cleaned_sessions: list[SandboxSession] = [] + error_count = 0 - for session in expired_sessions: + with self._tracer.start_span( + 'usecase.cleanup_expired_sandboxes', + ) as span: try: - cleaned_session = self._cleanup_session(session) + expired_sessions = self._repository.list_expired(self._clock.now()) except Exception as exc: - attrs = _sandbox_attrs(session) - attrs['error'] = type(exc).__name__ - self._logger.error( - 'sandbox_clean_failed', - attrs=attrs, + span.set_attribute('sandbox.result', 'error') + self._metrics.increment( + 'sandbox.cleanup.error.total', + attrs=_cleanup_error_metric_attrs( + type(exc).__name__, + 'list_expired', + ), ) - continue + span.record_error(exc) + raise - if cleaned_session is None: - continue + span.set_attribute('sandbox.expired_count', len(expired_sessions)) + for session in expired_sessions: + with self._tracer.start_span( + 'usecase.cleanup_expired_sandbox', + attrs=_sandbox_span_attrs(session), + ) as cleanup_span: + try: + cleaned_session = self._cleanup_session(session) + except Exception as exc: + error_count += 1 + cleanup_span.set_attribute('sandbox.result', 'error') + cleanup_span.record_error(exc) + self._metrics.increment( + 'sandbox.cleanup.error.total', + attrs=_error_metric_attrs(type(exc).__name__), + ) + attrs = _sandbox_attrs(session) + attrs['error'] = type(exc).__name__ + self._logger.error( + 'sandbox_clean_failed', + attrs=attrs, + ) + continue - cleaned_sessions.append(cleaned_session) - self._logger.info( - 'sandbox_cleaned', - attrs=_sandbox_attrs(cleaned_session), + if cleaned_session is None: + cleanup_span.set_attribute('sandbox.result', 'skipped') + continue + + cleanup_span.set_attribute('sandbox.result', 'cleaned') + cleaned_sessions.append(cleaned_session) + self._metrics.increment( + 'sandbox.cleanup.total', + attrs=_result_metric_attrs('cleaned'), + ) + self._logger.info( + 'sandbox_cleaned', + attrs=_sandbox_attrs(cleaned_session), + ) + + span.set_attribute('sandbox.cleaned_count', len(cleaned_sessions)) + span.set_attribute('sandbox.error_count', error_count) + span.set_attribute( + 'sandbox.result', + 'completed' if error_count == 0 else 'completed_with_errors', ) - - return cleaned_sessions + return cleaned_sessions def _cleanup_session(self, session: SandboxSession) -> SandboxSession | None: with self._locker.lock(session.chat_id): @@ -129,6 +214,7 @@ class CleanupExpiredSandboxes: self._runtime.stop(current_session.container_id) self._repository.delete(current_session.session_id) + _set_active_count(self._metrics, self._repository) return current_session @@ -142,3 +228,36 @@ def _sandbox_attrs(session: SandboxSession) -> dict[str, str]: 'session_id': str(session.session_id), 'container_id': session.container_id, } + + +def _sandbox_span_attrs(session: SandboxSession) -> dict[str, str]: + return { + 'chat.id': str(session.chat_id), + 'session.id': str(session.session_id), + 'container.id': session.container_id, + } + + +def _result_metric_attrs(result: str) -> dict[str, str]: + return {'result': result} + + +def _error_metric_attrs(error_type: str) -> dict[str, str]: + return {'error.type': error_type} + + +def _cleanup_error_metric_attrs( + error_type: str, + reason: str, +) -> dict[str, str]: + return { + 'error.type': error_type, + 'reason': reason, + } + + +def _set_active_count( + metrics: Metrics, + repository: SandboxSessionRepository, +) -> None: + metrics.set('sandbox.active.count', repository.count_active()) From 8d3a080d4575d43bb3878853977ce3613036859d Mon Sep 17 00:00:00 2001 From: Azamat Date: Fri, 3 Apr 2026 01:15:23 +0300 Subject: [PATCH 04/10] instrument sandbox docker runtime --- adapter/di/container.py | 9 +- adapter/docker/runtime.py | 225 +++++++++++++++++++++++------- adapter/sandbox/reconciliation.py | 50 ++++--- tasks.md | 2 +- test/test_create_http.py | 175 ++++++++++++++++++++++- test/test_docker_runtime.py | 23 ++- 6 files changed, 411 insertions(+), 73 deletions(-) diff --git a/adapter/di/container.py b/adapter/di/container.py index 4b87b2f..b18382c 100644 --- a/adapter/di/container.py +++ b/adapter/di/container.py @@ -81,11 +81,18 @@ def build_container( sandbox_repository = InMemorySandboxSessionRepository() sandbox_locker = ProcessLocalSandboxLifecycleLocker() - sandbox_runtime = DockerSandboxRuntime(app_config.sandbox, docker_client) + sandbox_runtime = DockerSandboxRuntime( + app_config.sandbox, + docker_client, + observability.metrics, + observability.tracer, + ) sandbox_reconciler = SandboxSessionReconciler( state_source=sandbox_runtime, registry=sandbox_repository, logger=observability.logger, + metrics=observability.metrics, + tracer=observability.tracer, ) repositories = AppRepositories(sandbox_session=sandbox_repository) diff --git a/adapter/docker/runtime.py b/adapter/docker/runtime.py index 3f33466..3c6e93c 100644 --- a/adapter/docker/runtime.py +++ b/adapter/docker/runtime.py @@ -1,3 +1,4 @@ +import time from datetime import datetime from pathlib import Path from uuid import UUID @@ -9,7 +10,7 @@ from docker.types import Mount from adapter.config.model import SandboxConfig from domain.error import SandboxError, SandboxStartError from domain.sandbox import SandboxSession, SandboxStatus -from usecase.interface import SandboxRuntime +from usecase.interface import Metrics, SandboxRuntime, Span, Tracer SANDBOX_LABELS = ('session_id', 'chat_id', 'expires_at') @@ -19,9 +20,13 @@ class DockerSandboxRuntime(SandboxRuntime): self, config: SandboxConfig, client: DockerClient, + metrics: Metrics, + tracer: Tracer, ) -> None: self._config = config self._client = client + self._metrics = metrics + self._tracer = tracer def create( self, @@ -31,62 +36,143 @@ class DockerSandboxRuntime(SandboxRuntime): created_at: datetime, expires_at: datetime, ) -> SandboxSession: - try: - chat_path = self._chat_path(chat_id) - dependencies_path = self._readonly_host_path( - self._config.dependencies_host_path - ) - lambda_tools_path = self._readonly_host_path( - self._config.lambda_tools_host_path - ) - chat_path.mkdir(parents=True, exist_ok=True) - container = self._client.containers.run( - self._config.image, - detach=True, - labels=self._labels(session_id, chat_id, expires_at), - mounts=self._mounts(chat_path, dependencies_path, lambda_tools_path), - ) - except (DockerException, OSError, ValueError) as exc: - raise SandboxStartError(str(chat_id)) from exc + started_at = time.perf_counter() + result = 'error' - container_id = str(getattr(container, 'id', '')).strip() - if not container_id: - raise SandboxStartError(str(chat_id)) + with self._tracer.start_span( + 'adapter.docker.create_sandbox', + attrs={ + 'chat.id': str(chat_id), + 'session.id': str(session_id), + }, + ) as span: + try: + try: + chat_path = self._chat_path(chat_id) + dependencies_path = self._readonly_host_path( + self._config.dependencies_host_path + ) + lambda_tools_path = self._readonly_host_path( + self._config.lambda_tools_host_path + ) + chat_path.mkdir(parents=True, exist_ok=True) + container = self._client.containers.run( + self._config.image, + detach=True, + labels=self._labels(session_id, chat_id, expires_at), + mounts=self._mounts( + chat_path, + dependencies_path, + lambda_tools_path, + ), + ) + except (DockerException, OSError, ValueError) as exc: + raise SandboxStartError(str(chat_id)) from exc - return SandboxSession( - session_id=session_id, - chat_id=chat_id, - container_id=container_id, - status=SandboxStatus.RUNNING, - created_at=created_at, - expires_at=expires_at, - ) + container_id = str(getattr(container, 'id', '')).strip() + if not container_id: + raise SandboxStartError(str(chat_id)) + + result = 'created' + span.set_attribute('container.id', container_id) + span.set_attribute('sandbox.result', result) + return SandboxSession( + session_id=session_id, + chat_id=chat_id, + container_id=container_id, + status=SandboxStatus.RUNNING, + created_at=created_at, + expires_at=expires_at, + ) + except Exception as exc: + span.set_attribute('sandbox.result', result) + span.record_error(exc) + self._metrics.increment( + 'sandbox.runtime.error.total', + attrs=_runtime_error_metric_attrs('create', _error_type(exc)), + ) + raise + finally: + self._metrics.record( + 'sandbox.runtime.create.duration_ms', + _duration_ms(started_at), + attrs=_runtime_metric_attrs('create', result), + ) def stop(self, container_id: str) -> None: - try: - container = self._client.containers.get(container_id) - container.stop() - except NotFound: - return - except DockerException as exc: - raise SandboxError('sandbox_stop_failed') from exc + started_at = time.perf_counter() + result = 'error' + + with self._tracer.start_span( + 'adapter.docker.stop_sandbox', + attrs={'container.id': container_id}, + ) as span: + try: + container = self._client.containers.get(container_id) + _set_span_container_attrs(span, container) + container.stop() + result = 'stopped' + span.set_attribute('sandbox.result', result) + except NotFound: + result = 'not_found' + span.set_attribute('sandbox.result', result) + return + except DockerException as exc: + span.set_attribute('sandbox.result', result) + span.record_error(exc) + self._metrics.increment( + 'sandbox.runtime.error.total', + attrs=_runtime_error_metric_attrs('stop', type(exc).__name__), + ) + raise SandboxError('sandbox_stop_failed') from exc + finally: + self._metrics.record( + 'sandbox.runtime.stop.duration_ms', + _duration_ms(started_at), + attrs=_runtime_metric_attrs('stop', result), + ) def list_active_sessions(self) -> list[SandboxSession]: - try: - containers = self._client.containers.list( - filters={'label': list(SANDBOX_LABELS)} - ) - except DockerException as exc: - raise SandboxError('sandbox_list_failed') from exc + started_at = time.perf_counter() + result = 'error' - sessions: list[SandboxSession] = [] - for container in containers: - session = self._session_from_container(container) - if session is None: - continue - sessions.append(session) + with self._tracer.start_span( + 'adapter.docker.list_active_sandboxes', + ) as span: + try: + try: + containers = self._client.containers.list( + filters={'label': list(SANDBOX_LABELS)} + ) + except DockerException as exc: + raise SandboxError('sandbox_list_failed') from exc - return sessions + sessions: list[SandboxSession] = [] + for container in containers: + session = self._session_from_container(container) + if session is None: + continue + sessions.append(session) + + result = 'listed' + span.set_attribute('sandbox.container_count', len(containers)) + span.set_attribute('sandbox.active_count', len(sessions)) + span.set_attribute('sandbox.result', result) + return sessions + except Exception as exc: + span.set_attribute('sandbox.result', result) + span.record_error(exc) + self._metrics.increment( + 'sandbox.runtime.error.total', + attrs=_runtime_error_metric_attrs('list_active', _error_type(exc)), + ) + raise + finally: + self._metrics.record( + 'sandbox.runtime.list_active.duration_ms', + _duration_ms(started_at), + attrs=_runtime_metric_attrs('list_active', result), + ) def _labels( self, @@ -186,3 +272,44 @@ class DockerSandboxRuntime(SandboxRuntime): def _parse_datetime(value: str) -> datetime: normalized = f'{value[:-1]}+00:00' if value.endswith('Z') else value return datetime.fromisoformat(normalized) + + +def _duration_ms(started_at: float) -> float: + return (time.perf_counter() - started_at) * 1000 + + +def _runtime_metric_attrs(operation: str, result: str) -> dict[str, str]: + return { + 'operation': operation, + 'result': result, + } + + +def _runtime_error_metric_attrs( + operation: str, + error_type: str, +) -> dict[str, str]: + return { + 'operation': operation, + 'error.type': error_type, + } + + +def _error_type(error: Exception) -> str: + if isinstance(error.__cause__, Exception): + return type(error.__cause__).__name__ + return type(error).__name__ + + +def _set_span_container_attrs(span: Span, container: object) -> None: + labels = getattr(container, 'labels', None) + if not isinstance(labels, dict): + return + + session_id = labels.get('session_id') + if isinstance(session_id, str) and session_id: + span.set_attribute('session.id', session_id) + + chat_id = labels.get('chat_id') + if isinstance(chat_id, str) and chat_id: + span.set_attribute('chat.id', chat_id) diff --git a/adapter/sandbox/reconciliation.py b/adapter/sandbox/reconciliation.py index 2d04ca5..81cdb75 100644 --- a/adapter/sandbox/reconciliation.py +++ b/adapter/sandbox/reconciliation.py @@ -3,7 +3,7 @@ from typing import Protocol from uuid import UUID from domain.sandbox import SandboxSession -from usecase.interface import Logger +from usecase.interface import Logger, Metrics, Tracer class SandboxSessionStateSource(Protocol): @@ -13,27 +13,45 @@ class SandboxSessionStateSource(Protocol): class SandboxSessionRegistry(Protocol): def replace_all(self, sessions: list[SandboxSession]) -> None: ... + def count_active(self) -> int: ... + @dataclass(frozen=True, slots=True) class SandboxSessionReconciler: state_source: SandboxSessionStateSource registry: SandboxSessionRegistry logger: Logger + metrics: Metrics + tracer: Tracer def execute(self) -> list[SandboxSession]: - sessions_by_chat_id: dict[UUID, SandboxSession] = {} - for session in sorted( - self.state_source.list_active_sessions(), - key=lambda item: item.created_at, - ): - sessions_by_chat_id[session.chat_id] = session + with self.tracer.start_span( + 'adapter.sandbox.reconcile_sessions', + ) as span: + try: + sessions_by_chat_id: dict[UUID, SandboxSession] = {} + discovered_sessions = self.state_source.list_active_sessions() + span.set_attribute('sandbox.discovered_count', len(discovered_sessions)) + for session in sorted( + discovered_sessions, + key=lambda item: item.created_at, + ): + sessions_by_chat_id[session.chat_id] = session - sessions = list(sessions_by_chat_id.values()) - self.registry.replace_all(sessions) - self.logger.info( - 'sandbox_reconciled', - attrs={ - 'session_count': len(sessions), - }, - ) - return sessions + sessions = list(sessions_by_chat_id.values()) + self.registry.replace_all(sessions) + active_count = self.registry.count_active() + self.metrics.set('sandbox.active.count', active_count) + span.set_attribute('sandbox.active_count', active_count) + span.set_attribute('sandbox.result', 'reconciled') + self.logger.info( + 'sandbox_reconciled', + attrs={ + 'session_count': active_count, + }, + ) + return sessions + except Exception as exc: + span.set_attribute('sandbox.result', 'error') + span.record_error(exc) + raise diff --git a/tasks.md b/tasks.md index a43d96a..012111d 100644 --- a/tasks.md +++ b/tasks.md @@ -257,7 +257,7 @@ ### M21. Трейсы и runtime metrics в Docker adapter и reconciliation - Субагент: `feature-developer` -- Статус: pending +- Статус: completed - Зависимости: `M19` - Commit required: yes - Commit message: `instrument sandbox docker runtime` diff --git a/test/test_create_http.py b/test/test_create_http.py index a25eaba..e8686c4 100644 --- a/test/test_create_http.py +++ b/test/test_create_http.py @@ -7,6 +7,7 @@ from docker import DockerClient from fastapi import FastAPI from starlette.types import Message, Scope +import adapter.di.container as container_module from adapter.config.model import ( AppConfig, AppSectionConfig, @@ -20,6 +21,7 @@ from adapter.config.model import ( TracingConfig, ) from adapter.di.container import AppContainer, AppRepositories, AppUsecases +from adapter.docker.runtime import DockerSandboxRuntime from adapter.http.fastapi import app as app_module from adapter.observability.noop import NoopMetrics, NoopTracer from adapter.observability.runtime import ObservabilityRuntime @@ -80,7 +82,8 @@ class FakeCleanupExpiredSandboxes(CleanupExpiredSandboxes): class FakeDockerClient(DockerClient): - def __init__(self) -> None: + def __init__(self, base_url: str | None = None) -> None: + self.base_url = base_url self.close_calls = 0 def close(self) -> None: @@ -104,6 +107,79 @@ class FakeClock: return self._now +class RecordingMetrics: + def __init__(self) -> None: + self.increment_calls: list[tuple[str, int, Attrs | None]] = [] + self.record_calls: list[tuple[str, float, Attrs | None]] = [] + self.set_calls: list[tuple[str, int | float, Attrs | None]] = [] + + def increment( + self, + name: str, + value: int = 1, + attrs: Attrs | None = None, + ) -> None: + self.increment_calls.append((name, value, attrs)) + + def record( + self, + name: str, + value: float, + attrs: Attrs | None = None, + ) -> None: + self.record_calls.append((name, value, attrs)) + + def set( + self, + name: str, + value: int | float, + attrs: Attrs | None = None, + ) -> None: + self.set_calls.append((name, value, attrs)) + + +class RecordingSpan: + def __init__(self) -> None: + self.attrs: dict[str, str | int | float | bool] = {} + self.errors: list[Exception] = [] + + def set_attribute(self, name: str, value: str | int | float | bool) -> None: + self.attrs[name] = value + + def record_error(self, error: Exception) -> None: + self.errors.append(error) + + +class RecordingSpanContext: + def __init__(self, span: RecordingSpan) -> None: + self._span = span + + def __enter__(self) -> RecordingSpan: + return self._span + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + traceback: object, + ) -> bool | None: + return None + + +class RecordingTracer: + def __init__(self) -> None: + self.spans: list[tuple[str, Attrs | None, RecordingSpan]] = [] + + def start_span( + self, + name: str, + attrs: Attrs | None = None, + ) -> RecordingSpanContext: + span = RecordingSpan() + self.spans.append((name, attrs, span)) + return RecordingSpanContext(span) + + class FakeLifecycleRuntime: def __init__(self, sessions: list[SandboxSession]) -> None: self._sessions = list(sessions) @@ -142,6 +218,26 @@ class FakeLifecycleRuntime: self.stop_calls.append(container_id) +class FixedSandboxState: + def __init__(self, sessions: list[SandboxSession]) -> None: + self._sessions = list(sessions) + + def list_active_sessions(self) -> list[SandboxSession]: + return list(self._sessions) + + +class CountingRegistry: + def __init__(self, count_active_result: int) -> None: + self._count_active_result = count_active_result + self.replaced_sessions: list[SandboxSession] = [] + + def replace_all(self, sessions: list[SandboxSession]) -> None: + self.replaced_sessions = list(sessions) + + def count_active(self) -> int: + return self._count_active_result + + def build_config() -> AppConfig: return AppConfig( app=AppSectionConfig(name='master', env='test'), @@ -198,6 +294,8 @@ def build_container( state_source=EmptySandboxState(), registry=repositories.sandbox_session, logger=logger, + metrics=observability.metrics, + tracer=observability.tracer, ) usecases = AppUsecases( create_sandbox=create_sandbox_usecase, @@ -494,6 +592,8 @@ def test_startup_reconciliation_reuses_existing_container_after_restart( state_source=runtime, registry=repository, logger=logger, + metrics=observability.metrics, + tracer=observability.tracer, ) usecases = AppUsecases( create_sandbox=CreateSandbox( @@ -586,3 +686,76 @@ def test_removed_user_endpoint_returns_not_found(monkeypatch) -> None: assert status_code == 404 assert response == {'detail': 'Not Found'} assert docker_client.close_calls == 1 + + +def test_reconciliation_uses_registry_backed_active_count_metric() -> None: + logger = FakeLogger() + metrics = RecordingMetrics() + tracer = RecordingTracer() + created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + session = SandboxSession( + session_id=SESSION_ID, + chat_id=CHAT_ID, + container_id='container-123', + status=SandboxStatus.RUNNING, + created_at=created_at, + expires_at=created_at + timedelta(minutes=5), + ) + registry = CountingRegistry(count_active_result=7) + reconciler = SandboxSessionReconciler( + state_source=FixedSandboxState([session]), + registry=registry, + logger=logger, + metrics=metrics, + tracer=tracer, + ) + + sessions = reconciler.execute() + + assert sessions == [session] + assert registry.replaced_sessions == [session] + assert metrics.set_calls == [('sandbox.active.count', 7, None)] + assert tracer.spans[0][0] == 'adapter.sandbox.reconcile_sessions' + assert tracer.spans[0][2].attrs['sandbox.active_count'] == 7 + + +def test_build_container_wires_observability_into_runtime_and_reconciler( + monkeypatch, +) -> None: + logger = FakeLogger() + metrics = RecordingMetrics() + tracer = RecordingTracer() + observability = ObservabilityRuntime( + logger=logger, + metrics=metrics, + tracer=tracer, + ) + docker_client = FakeDockerClient() + monkeypatch.setattr( + container_module, 'build_observability', lambda config: observability + ) + monkeypatch.setattr( + container_module.docker, + 'DockerClient', + lambda base_url: docker_client, + ) + + container = container_module.build_container(config=build_config()) + + runtime = container.sandbox_reconciler.state_source + assert isinstance(runtime, DockerSandboxRuntime) + assert runtime._metrics is metrics + assert runtime._tracer is tracer + assert container.sandbox_reconciler.metrics is metrics + assert container.sandbox_reconciler.tracer is tracer + assert container.usecases.create_sandbox._runtime is runtime + assert container.usecases.create_sandbox._metrics is metrics + assert container.usecases.create_sandbox._tracer is tracer + assert container.usecases.cleanup_expired_sandboxes._runtime is runtime + assert container.usecases.cleanup_expired_sandboxes._metrics is metrics + assert container.usecases.cleanup_expired_sandboxes._tracer is tracer + assert container._docker_client is docker_client + + container.shutdown() + + assert docker_client.close_calls == 1 diff --git a/test/test_docker_runtime.py b/test/test_docker_runtime.py index 1e207f3..ee6a2a4 100644 --- a/test/test_docker_runtime.py +++ b/test/test_docker_runtime.py @@ -10,6 +10,7 @@ from docker.types import Mount from adapter.config.model import SandboxConfig from adapter.docker.runtime import DockerSandboxRuntime +from adapter.observability.noop import NoopMetrics, NoopTracer from domain.error import SandboxError, SandboxStartError from domain.sandbox import SandboxSession, SandboxStatus @@ -116,6 +117,18 @@ def build_config(tmp_path: Path) -> SandboxConfig: ) +def build_runtime( + config: SandboxConfig, + containers: FakeContainers, +) -> DockerSandboxRuntime: + return DockerSandboxRuntime( + config, + FakeDockerClient(containers), + NoopMetrics(), + NoopTracer(), + ) + + def test_runtime_create_applies_mount_policy_and_labels_with_canonical_chat_id( tmp_path: Path, ) -> None: @@ -123,7 +136,7 @@ def test_runtime_create_applies_mount_policy_and_labels_with_canonical_chat_id( (tmp_path / 'dependencies').mkdir() (tmp_path / 'lambda-tools').mkdir() containers = FakeContainers() - runtime = DockerSandboxRuntime(config, FakeDockerClient(containers)) + runtime = build_runtime(config, containers) created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) expires_at = created_at + timedelta(minutes=5) @@ -181,7 +194,7 @@ def test_runtime_create_raises_start_error_when_container_id_is_missing( (tmp_path / 'dependencies').mkdir() (tmp_path / 'lambda-tools').mkdir() containers = FakeContainers(run_result=FakeContainer('')) - runtime = DockerSandboxRuntime(config, FakeDockerClient(containers)) + runtime = build_runtime(config, containers) with pytest.raises(SandboxStartError) as excinfo: runtime.create( @@ -199,7 +212,7 @@ def test_runtime_stop_ignores_missing_container(tmp_path: Path) -> None: config = build_config(tmp_path) containers = FakeContainers() containers.get_result = NotFound('missing') - runtime = DockerSandboxRuntime(config, FakeDockerClient(containers)) + runtime = build_runtime(config, containers) runtime.stop('container-123') @@ -210,7 +223,7 @@ def test_runtime_stop_wraps_docker_errors(tmp_path: Path) -> None: config = build_config(tmp_path) containers = FakeContainers() containers.get_result = DockerException('boom') - runtime = DockerSandboxRuntime(config, FakeDockerClient(containers)) + runtime = build_runtime(config, containers) with pytest.raises(SandboxError) as excinfo: runtime.stop('container-123') @@ -243,7 +256,7 @@ def test_runtime_list_active_sessions_reads_valid_labeled_containers( created_at='2026-04-02T12:01:00Z', ), ] - runtime = DockerSandboxRuntime(config, FakeDockerClient(containers)) + runtime = build_runtime(config, containers) sessions = runtime.list_active_sessions() From dff28efecf6cd7e8c154a5ea3d5ed911bc54bd68 Mon Sep 17 00:00:00 2001 From: Azamat Date: Fri, 3 Apr 2026 01:34:10 +0300 Subject: [PATCH 05/10] add sandbox observability tests --- tasks.md | 2 +- test/test_docker_runtime.py | 276 ++++++++++++++++++++++++++++ test/test_sandbox_usecase.py | 344 +++++++++++++++++++++++++++++++++++ 3 files changed, 621 insertions(+), 1 deletion(-) diff --git a/tasks.md b/tasks.md index 012111d..e01dcde 100644 --- a/tasks.md +++ b/tasks.md @@ -269,7 +269,7 @@ ### M22. Тесты на sandbox observability - Субагент: `test-engineer` -- Статус: pending +- Статус: completed - Зависимости: `M20`, `M21` - Commit required: yes - Commit message: `add sandbox observability tests` diff --git a/test/test_docker_runtime.py b/test/test_docker_runtime.py index ee6a2a4..4db1095 100644 --- a/test/test_docker_runtime.py +++ b/test/test_docker_runtime.py @@ -1,5 +1,6 @@ from datetime import UTC, datetime, timedelta from pathlib import Path +from types import TracebackType from typing import Any, TypedDict from uuid import UUID @@ -13,6 +14,7 @@ from adapter.docker.runtime import DockerSandboxRuntime from adapter.observability.noop import NoopMetrics, NoopTracer from domain.error import SandboxError, SandboxStartError from domain.sandbox import SandboxSession, SandboxStatus +from usecase.interface import AttrValue, Attrs CHAT_ID = UUID('123e4567-e89b-12d3-a456-426614174000') NON_CANONICAL_CHAT_ID = '123E4567E89B12D3A456426614174000' @@ -103,6 +105,140 @@ class FakeDockerClient(DockerClient): return self._containers +class RecordingMetrics: + def __init__(self) -> None: + self.increment_calls: list[tuple[str, int, Attrs | None]] = [] + self.record_calls: list[tuple[str, float, Attrs | None]] = [] + self.set_calls: list[tuple[str, int | float, Attrs | None]] = [] + + def increment( + self, + name: str, + value: int = 1, + attrs: Attrs | None = None, + ) -> None: + self.increment_calls.append((name, value, attrs)) + + def record( + self, + name: str, + value: float, + attrs: Attrs | None = None, + ) -> None: + self.record_calls.append((name, value, attrs)) + + def set( + self, + name: str, + value: int | float, + attrs: Attrs | None = None, + ) -> None: + self.set_calls.append((name, value, attrs)) + + +class RecordingSpan: + def __init__(self) -> None: + self.attrs: dict[str, AttrValue] = {} + self.errors: list[Exception] = [] + + def set_attribute(self, name: str, value: AttrValue) -> None: + self.attrs[name] = value + + def record_error(self, error: Exception) -> None: + self.errors.append(error) + + +class RecordingSpanContext: + def __init__(self, span: RecordingSpan) -> None: + self._span = span + + def __enter__(self) -> RecordingSpan: + return self._span + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + traceback: TracebackType | None, + ) -> bool | None: + return None + + +class RecordingTracer: + def __init__(self) -> None: + self.spans: list[tuple[str, Attrs | None, RecordingSpan]] = [] + + def start_span( + self, + name: str, + attrs: Attrs | None = None, + ) -> RecordingSpanContext: + span = RecordingSpan() + self.spans.append((name, attrs, span)) + return RecordingSpanContext(span) + + +def _attrs_include( + actual: Attrs | dict[str, AttrValue] | None, + expected: dict[str, AttrValue], +) -> bool: + if actual is None: + return False + + return all(actual.get(name) == value for name, value in expected.items()) + + +def _find_span( + tracer: RecordingTracer, + name: str, + attrs: dict[str, AttrValue] | None = None, + span_attrs: dict[str, AttrValue] | None = None, +) -> RecordingSpan: + for recorded_name, recorded_attrs, span in tracer.spans: + if recorded_name != name: + continue + if attrs is not None and not _attrs_include(recorded_attrs, attrs): + continue + if span_attrs is not None and not _attrs_include(span.attrs, span_attrs): + continue + return span + + raise AssertionError(f'missing span {name}') + + +def _find_increment_call( + metrics: RecordingMetrics, + name: str, + *, + value: int = 1, + attrs: dict[str, AttrValue] | None = None, +) -> tuple[str, int, Attrs | None]: + for recorded_name, recorded_value, recorded_attrs in metrics.increment_calls: + if recorded_name != name or recorded_value != value: + continue + if attrs is not None and not _attrs_include(recorded_attrs, attrs): + continue + return recorded_name, recorded_value, recorded_attrs + + raise AssertionError(f'missing increment metric {name}') + + +def _find_record_call( + metrics: RecordingMetrics, + name: str, + *, + attrs: dict[str, AttrValue] | None = None, +) -> tuple[str, float, Attrs | None]: + for recorded_name, recorded_value, recorded_attrs in metrics.record_calls: + if recorded_name != name: + continue + if attrs is not None and not _attrs_include(recorded_attrs, attrs): + continue + return recorded_name, recorded_value, recorded_attrs + + raise AssertionError(f'missing record metric {name}') + + def build_config(tmp_path: Path) -> SandboxConfig: return SandboxConfig( image='sandbox:latest', @@ -187,6 +323,48 @@ def test_runtime_create_applies_mount_policy_and_labels_with_canonical_chat_id( ] +def test_runtime_create_records_observability(tmp_path: Path) -> None: + config = build_config(tmp_path) + (tmp_path / 'dependencies').mkdir() + (tmp_path / 'lambda-tools').mkdir() + containers = FakeContainers() + metrics = RecordingMetrics() + tracer = RecordingTracer() + runtime = DockerSandboxRuntime( + config, + FakeDockerClient(containers), + metrics, + tracer, + ) + created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + expires_at = created_at + timedelta(minutes=5) + + session = runtime.create( + session_id=SESSION_ID, + chat_id=CHAT_ID, + created_at=created_at, + expires_at=expires_at, + ) + + assert session.container_id == 'container-123' + duration_call = _find_record_call( + metrics, + 'sandbox.runtime.create.duration_ms', + attrs={'operation': 'create', 'result': 'created'}, + ) + assert duration_call[1] >= 0 + span = _find_span( + tracer, + 'adapter.docker.create_sandbox', + {'chat.id': str(CHAT_ID), 'session.id': str(SESSION_ID)}, + { + 'container.id': 'container-123', + 'sandbox.result': 'created', + }, + ) + assert not span.errors + + def test_runtime_create_raises_start_error_when_container_id_is_missing( tmp_path: Path, ) -> None: @@ -208,6 +386,51 @@ def test_runtime_create_raises_start_error_when_container_id_is_missing( assert excinfo.value.chat_id == str(CHAT_ID) +def test_runtime_create_error_records_observability_when_container_id_missing( + tmp_path: Path, +) -> None: + config = build_config(tmp_path) + (tmp_path / 'dependencies').mkdir() + (tmp_path / 'lambda-tools').mkdir() + containers = FakeContainers(run_result=FakeContainer('')) + metrics = RecordingMetrics() + tracer = RecordingTracer() + runtime = DockerSandboxRuntime( + config, + FakeDockerClient(containers), + metrics, + tracer, + ) + + with pytest.raises(SandboxStartError) as excinfo: + runtime.create( + session_id=SESSION_ID, + chat_id=CHAT_ID, + created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC), + expires_at=datetime(2026, 4, 2, 12, 5, tzinfo=UTC), + ) + + assert str(excinfo.value) == 'sandbox_start_failed' + _find_increment_call( + metrics, + 'sandbox.runtime.error.total', + attrs={'operation': 'create', 'error.type': 'SandboxStartError'}, + ) + duration_call = _find_record_call( + metrics, + 'sandbox.runtime.create.duration_ms', + attrs={'operation': 'create', 'result': 'error'}, + ) + assert duration_call[1] >= 0 + span = _find_span( + tracer, + 'adapter.docker.create_sandbox', + {'chat.id': str(CHAT_ID), 'session.id': str(SESSION_ID)}, + {'sandbox.result': 'error'}, + ) + assert excinfo.value in span.errors + + def test_runtime_stop_ignores_missing_container(tmp_path: Path) -> None: config = build_config(tmp_path) containers = FakeContainers() @@ -273,3 +496,56 @@ def test_runtime_list_active_sessions_reads_valid_labeled_containers( assert containers.list_calls == [ {'filters': {'label': ['session_id', 'chat_id', 'expires_at']}} ] + + +def test_runtime_list_active_records_observability(tmp_path: Path) -> None: + config = build_config(tmp_path) + containers = FakeContainers() + expires_at = datetime(2026, 4, 2, 12, 5, tzinfo=UTC) + containers.list_result = [ + FakeListedContainer( + 'container-123', + labels={ + 'session_id': str(SESSION_ID), + 'chat_id': str(CHAT_ID), + 'expires_at': expires_at.isoformat(), + }, + created_at='2026-04-02T12:00:00Z', + ), + FakeListedContainer( + 'container-bad', + labels={ + 'chat_id': str(CHAT_ID), + 'expires_at': expires_at.isoformat(), + }, + created_at='2026-04-02T12:01:00Z', + ), + ] + metrics = RecordingMetrics() + tracer = RecordingTracer() + runtime = DockerSandboxRuntime( + config, + FakeDockerClient(containers), + metrics, + tracer, + ) + + sessions = runtime.list_active_sessions() + + assert len(sessions) == 1 + duration_call = _find_record_call( + metrics, + 'sandbox.runtime.list_active.duration_ms', + attrs={'operation': 'list_active', 'result': 'listed'}, + ) + assert duration_call[1] >= 0 + span = _find_span( + tracer, + 'adapter.docker.list_active_sandboxes', + span_attrs={ + 'sandbox.container_count': 2, + 'sandbox.active_count': 1, + 'sandbox.result': 'listed', + }, + ) + assert not span.errors diff --git a/test/test_sandbox_usecase.py b/test/test_sandbox_usecase.py index f744492..403eb0f 100644 --- a/test/test_sandbox_usecase.py +++ b/test/test_sandbox_usecase.py @@ -1,11 +1,15 @@ import threading from datetime import UTC, datetime, timedelta +from types import TracebackType from uuid import UUID +import pytest + from adapter.observability.noop import NoopMetrics, NoopTracer from domain.sandbox import SandboxSession, SandboxStatus from repository.sandbox_lock import ProcessLocalSandboxLifecycleLocker from repository.sandbox_session import InMemorySandboxSessionRepository +from usecase.interface import AttrValue, Attrs from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox, CreateSandboxCommand CHAT_ID = UUID('11111111-1111-1111-1111-111111111111') @@ -53,6 +57,130 @@ class FakeLogger: self.messages.append(('error', message, attrs)) +class RecordingMetrics: + def __init__(self) -> None: + self.increment_calls: list[tuple[str, int, Attrs | None]] = [] + self.record_calls: list[tuple[str, float, Attrs | None]] = [] + self.set_calls: list[tuple[str, int | float, Attrs | None]] = [] + + def increment( + self, + name: str, + value: int = 1, + attrs: Attrs | None = None, + ) -> None: + self.increment_calls.append((name, value, attrs)) + + def record( + self, + name: str, + value: float, + attrs: Attrs | None = None, + ) -> None: + self.record_calls.append((name, value, attrs)) + + def set( + self, + name: str, + value: int | float, + attrs: Attrs | None = None, + ) -> None: + self.set_calls.append((name, value, attrs)) + + +class RecordingSpan: + def __init__(self) -> None: + self.attrs: dict[str, AttrValue] = {} + self.errors: list[Exception] = [] + + def set_attribute(self, name: str, value: AttrValue) -> None: + self.attrs[name] = value + + def record_error(self, error: Exception) -> None: + self.errors.append(error) + + +class RecordingSpanContext: + def __init__(self, span: RecordingSpan) -> None: + self._span = span + + def __enter__(self) -> RecordingSpan: + return self._span + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + traceback: TracebackType | None, + ) -> bool | None: + return None + + +class RecordingTracer: + def __init__(self) -> None: + self.spans: list[tuple[str, Attrs | None, RecordingSpan]] = [] + + def start_span( + self, + name: str, + attrs: Attrs | None = None, + ) -> RecordingSpanContext: + span = RecordingSpan() + self.spans.append((name, attrs, span)) + return RecordingSpanContext(span) + + +def _attrs_include( + actual: Attrs | dict[str, AttrValue] | None, + expected: dict[str, AttrValue], +) -> bool: + if actual is None: + return False + + return all(actual.get(name) == value for name, value in expected.items()) + + +def _find_span( + tracer: RecordingTracer, + name: str, + attrs: dict[str, AttrValue] | None = None, + span_attrs: dict[str, AttrValue] | None = None, +) -> RecordingSpan: + for recorded_name, recorded_attrs, span in tracer.spans: + if recorded_name != name: + continue + if attrs is not None and not _attrs_include(recorded_attrs, attrs): + continue + if span_attrs is not None and not _attrs_include(span.attrs, span_attrs): + continue + return span + + raise AssertionError(f'missing span {name}') + + +def _assert_increment_metric_present( + metrics: RecordingMetrics, + name: str, + *, + value: int = 1, + attrs: dict[str, AttrValue] | None = None, +) -> None: + for recorded_name, recorded_value, recorded_attrs in metrics.increment_calls: + if recorded_name != name or recorded_value != value: + continue + if attrs is not None and not _attrs_include(recorded_attrs, attrs): + continue + return + + raise AssertionError(f'missing increment metric {name}') + + +def _active_count_values(metrics: RecordingMetrics) -> list[int | float]: + return [ + value for name, value, _ in metrics.set_calls if name == 'sandbox.active.count' + ] + + class FakeLockContext: def __enter__(self) -> None: return None @@ -198,6 +326,30 @@ class FailingStopRuntime(FakeRuntime): raise RuntimeError('stop_failed') +class FailingCreateRuntime(FakeRuntime): + def __init__(self, error: Exception) -> None: + super().__init__() + self._error = error + + def create( + self, + *, + session_id: UUID, + chat_id: UUID, + created_at: datetime, + expires_at: datetime, + ) -> SandboxSession: + self.create_calls.append( + { + 'session_id': session_id, + 'chat_id': chat_id, + 'created_at': created_at, + 'expires_at': expires_at, + } + ) + raise self._error + + def test_create_sandbox_reuses_active_session_when_not_expired() -> None: now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) session = SandboxSession( @@ -244,6 +396,104 @@ def test_create_sandbox_reuses_active_session_when_not_expired() -> None: ] +def test_create_sandbox_reuse_records_observability() -> None: + now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + session = SandboxSession( + session_id=SESSION_REUSED_ID, + chat_id=CHAT_ID, + container_id='container-1', + status=SandboxStatus.RUNNING, + created_at=now - timedelta(minutes=1), + expires_at=now + timedelta(minutes=4), + ) + repository = InMemorySandboxSessionRepository() + repository.save(session) + metrics = RecordingMetrics() + tracer = RecordingTracer() + usecase = CreateSandbox( + repository=repository, + locker=FakeLocker(), + runtime=FakeRuntime(), + clock=FakeClock(now), + logger=FakeLogger(), + metrics=metrics, + tracer=tracer, + ttl=timedelta(minutes=5), + ) + + result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) + + assert result == session + _assert_increment_metric_present( + metrics, + 'sandbox.create.total', + attrs={'result': 'reused'}, + ) + span = _find_span( + tracer, + 'usecase.create_sandbox', + {'chat.id': str(CHAT_ID)}, + { + 'session.id': str(SESSION_REUSED_ID), + 'container.id': 'container-1', + 'sandbox.result': 'reused', + }, + ) + assert not span.errors + + +def test_create_sandbox_replace_records_observability_and_final_active_count( + monkeypatch, +) -> None: + now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + expired_session = SandboxSession( + session_id=SESSION_OLD_ID, + chat_id=CHAT_ID, + container_id='container-old', + status=SandboxStatus.RUNNING, + created_at=now - timedelta(minutes=10), + expires_at=now, + ) + repository = InMemorySandboxSessionRepository() + repository.save(expired_session) + metrics = RecordingMetrics() + tracer = RecordingTracer() + usecase = CreateSandbox( + repository=repository, + locker=FakeLocker(), + runtime=FakeRuntime(), + clock=FakeClock(now), + logger=FakeLogger(), + metrics=metrics, + tracer=tracer, + ttl=timedelta(minutes=5), + ) + monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID) + + result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) + + assert result.session_id == SESSION_NEW_ID + assert repository.count_active() == 1 + _assert_increment_metric_present( + metrics, + 'sandbox.create.total', + attrs={'result': 'replaced'}, + ) + assert _active_count_values(metrics) + assert _active_count_values(metrics)[-1] == 1 + span = _find_span( + tracer, + 'usecase.create_sandbox', + {'chat.id': str(CHAT_ID)}, + { + 'session.id': str(SESSION_NEW_ID), + 'container.id': f'container-{SESSION_NEW_ID}', + 'sandbox.result': 'replaced', + }, + ) + assert not span.errors + + def test_create_sandbox_replaces_expired_session_and_creates_new_one( monkeypatch, ) -> None: @@ -363,6 +613,42 @@ def test_create_sandbox_creates_new_session_when_none_exists() -> None: ] +def test_create_sandbox_error_records_observability(monkeypatch) -> None: + now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + metrics = RecordingMetrics() + tracer = RecordingTracer() + usecase = CreateSandbox( + repository=InMemorySandboxSessionRepository(), + locker=FakeLocker(), + runtime=FailingCreateRuntime(RuntimeError('create_failed')), + clock=FakeClock(now), + logger=FakeLogger(), + metrics=metrics, + tracer=tracer, + ttl=timedelta(minutes=5), + ) + monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID) + + with pytest.raises(RuntimeError, match='create_failed') as excinfo: + usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) + + _assert_increment_metric_present( + metrics, + 'sandbox.create.total', + attrs={'result': 'error'}, + ) + span = _find_span( + tracer, + 'usecase.create_sandbox', + {'chat.id': str(CHAT_ID)}, + { + 'session.id': str(SESSION_NEW_ID), + 'sandbox.result': 'error', + }, + ) + assert excinfo.value in span.errors + + def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id( monkeypatch, ) -> None: @@ -516,6 +802,64 @@ def test_cleanup_expired_sandboxes_stops_and_deletes_only_expired_sessions() -> ] +def test_cleanup_expired_sandboxes_records_observability_on_cleaned_session() -> None: + now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + expired_session = SandboxSession( + session_id=SESSION_EXPIRED_ID, + chat_id=EXPIRED_CHAT_ID, + container_id='container-expired', + status=SandboxStatus.RUNNING, + created_at=now - timedelta(minutes=10), + expires_at=now - timedelta(seconds=1), + ) + repository = InMemorySandboxSessionRepository() + repository.save(expired_session) + metrics = RecordingMetrics() + tracer = RecordingTracer() + usecase = CleanupExpiredSandboxes( + repository=repository, + locker=FakeLocker(), + runtime=FakeRuntime(), + clock=FakeClock(now), + logger=FakeLogger(), + metrics=metrics, + tracer=tracer, + ) + + result = usecase.execute() + + assert result == [expired_session] + _assert_increment_metric_present( + metrics, + 'sandbox.cleanup.total', + attrs={'result': 'cleaned'}, + ) + assert _active_count_values(metrics) + assert _active_count_values(metrics)[-1] == 0 + root_span = _find_span( + tracer, + 'usecase.cleanup_expired_sandboxes', + span_attrs={ + 'sandbox.expired_count': 1, + 'sandbox.cleaned_count': 1, + 'sandbox.error_count': 0, + 'sandbox.result': 'completed', + }, + ) + assert not root_span.errors + cleanup_span = _find_span( + tracer, + 'usecase.cleanup_expired_sandbox', + { + 'chat.id': str(EXPIRED_CHAT_ID), + 'session.id': str(SESSION_EXPIRED_ID), + 'container.id': 'container-expired', + }, + {'sandbox.result': 'cleaned'}, + ) + assert not cleanup_span.errors + + def test_cleanup_expired_sandboxes_skips_replaced_session_from_stale_snapshot() -> None: now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) expired_snapshot = SandboxSession( From 02770bce7d0bbfa8a32b6c1efb0095f964f18a32 Mon Sep 17 00:00:00 2001 From: Azamat Date: Fri, 3 Apr 2026 01:55:12 +0300 Subject: [PATCH 06/10] fix sandbox replace trace identity --- tasks.md | 38 ++++++++++++++++++++++- test/test_sandbox_usecase.py | 59 +++++++++++++++++++++++++++++++++++- usecase/sandbox.py | 30 +++++++++++++++--- 3 files changed, 120 insertions(+), 7 deletions(-) diff --git a/tasks.md b/tasks.md index e01dcde..4b9e7e5 100644 --- a/tasks.md +++ b/tasks.md @@ -281,9 +281,45 @@ ### M23. Boundary review для sandbox observability - Субагент: `code-reviewer` -- Статус: pending +- Статус: in_progress - Зависимости: `M22` - Commit required: no - Scope: проверить, что observability изменения закрывают issue #11 и FR-034 без нарушения clean architecture - Файлы: весь измененный код после `M19`-`M22` - Критерии приемки: inner layers не импортируют OTel; Docker-specific tracing остается в `adapter/docker/`; current-state и duration metrics достаточно покрывают sandbox lifecycle; замечания сведены к minor или отсутствуют + +## Follow-up после M23 boundary review + +### M24. Исправить replace trace identity в CreateSandbox + +- Субагент: `feature-developer` +- Статус: completed +- Зависимости: `M23` +- Commit required: yes +- Commit message: `fix sandbox replace trace identity` +- Scope: устранить смешение old/new sandbox identifiers в replace path usecase tracing +- Файлы: `usecase/sandbox.py`, при необходимости точечные тесты в `test/*` +- Решение: сохранять старые и новые sandbox identifiers в отдельных span attrs или child spans так, чтобы replace success и replace failure оставались однозначно трассируемыми +- Критерии приемки: replace path не перетирает previous/new identifiers; при replace failure span остается консистентным и отражает обе стороны lifecycle + +### M25. Добрать failure-path observability regression tests + +- Субагент: `test-engineer` +- Статус: pending +- Зависимости: `M24` +- Commit required: yes +- Commit message: `add sandbox observability failure tests` +- Scope: покрыть tests для replace-failure trace, cleanup error metrics/spans и Docker stop observability +- Файлы: `test/test_sandbox_usecase.py`, `test/test_docker_runtime.py`, при необходимости другие focused tests в `test/*` +- Решение: использовать presence-based assertions и проверять ключевые span/metric contracts без brittle exact-order checks +- Критерии приемки: есть тест на replace failure tracing; есть тест на `sandbox.cleanup.error.total`; есть тесты на Docker stop observability для success/error/not_found или эквивалентного набора outcome paths + +### M26. Повторный boundary review для sandbox observability + +- Субагент: `code-reviewer` +- Статус: pending +- Зависимости: `M25` +- Commit required: no +- Scope: подтвердить, что follow-up fixes закрыли M23 замечания без новых boundary нарушений +- Файлы: весь измененный код после `M24`-`M25` +- Критерии приемки: нет замечаний по replace tracing identity и missing failure-path observability coverage; clean architecture по-прежнему соблюдена diff --git a/test/test_sandbox_usecase.py b/test/test_sandbox_usecase.py index 403eb0f..92c7937 100644 --- a/test/test_sandbox_usecase.py +++ b/test/test_sandbox_usecase.py @@ -9,7 +9,7 @@ from adapter.observability.noop import NoopMetrics, NoopTracer from domain.sandbox import SandboxSession, SandboxStatus from repository.sandbox_lock import ProcessLocalSandboxLifecycleLocker from repository.sandbox_session import InMemorySandboxSessionRepository -from usecase.interface import AttrValue, Attrs +from usecase.interface import Attrs, AttrValue from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox, CreateSandboxCommand CHAT_ID = UUID('11111111-1111-1111-1111-111111111111') @@ -486,6 +486,10 @@ def test_create_sandbox_replace_records_observability_and_final_active_count( 'usecase.create_sandbox', {'chat.id': str(CHAT_ID)}, { + 'sandbox.previous_session.id': str(SESSION_OLD_ID), + 'sandbox.previous_container.id': 'container-old', + 'sandbox.new_session.id': str(SESSION_NEW_ID), + 'sandbox.new_container.id': f'container-{SESSION_NEW_ID}', 'session.id': str(SESSION_NEW_ID), 'container.id': f'container-{SESSION_NEW_ID}', 'sandbox.result': 'replaced', @@ -649,6 +653,59 @@ def test_create_sandbox_error_records_observability(monkeypatch) -> None: assert excinfo.value in span.errors +def test_create_sandbox_replace_stop_failure_preserves_separate_identities( + monkeypatch, +) -> None: + now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + expired_session = SandboxSession( + session_id=SESSION_OLD_ID, + chat_id=CHAT_ID, + container_id='container-old', + status=SandboxStatus.RUNNING, + created_at=now - timedelta(minutes=10), + expires_at=now, + ) + repository = InMemorySandboxSessionRepository() + repository.save(expired_session) + metrics = RecordingMetrics() + tracer = RecordingTracer() + usecase = CreateSandbox( + repository=repository, + locker=FakeLocker(), + runtime=FailingStopRuntime('container-old'), + clock=FakeClock(now), + logger=FakeLogger(), + metrics=metrics, + tracer=tracer, + ttl=timedelta(minutes=5), + ) + monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID) + + with pytest.raises(RuntimeError, match='stop_failed') as excinfo: + usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) + + _assert_increment_metric_present( + metrics, + 'sandbox.create.total', + attrs={'result': 'error'}, + ) + span = _find_span( + tracer, + 'usecase.create_sandbox', + {'chat.id': str(CHAT_ID)}, + { + 'sandbox.previous_session.id': str(SESSION_OLD_ID), + 'sandbox.previous_container.id': 'container-old', + 'sandbox.new_session.id': str(SESSION_NEW_ID), + 'sandbox.result': 'error', + }, + ) + assert 'sandbox.new_container.id' not in span.attrs + assert 'session.id' not in span.attrs + assert 'container.id' not in span.attrs + assert excinfo.value in span.errors + + def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id( monkeypatch, ) -> None: diff --git a/usecase/sandbox.py b/usecase/sandbox.py index 2bdb369..0a3412f 100644 --- a/usecase/sandbox.py +++ b/usecase/sandbox.py @@ -67,10 +67,22 @@ class CreateSandbox: return session result = 'created' + new_session_id: UUID | None = None if session is not None: result = 'replaced' - span.set_attribute('session.id', str(session.session_id)) - span.set_attribute('container.id', session.container_id) + new_session_id = _new_session_id() + span.set_attribute( + 'sandbox.previous_session.id', + str(session.session_id), + ) + span.set_attribute( + 'sandbox.previous_container.id', + session.container_id, + ) + span.set_attribute( + 'sandbox.new_session.id', + str(new_session_id), + ) self._logger.info( 'sandbox_replaced', attrs=_sandbox_attrs(session), @@ -81,16 +93,24 @@ class CreateSandbox: created_at = self._clock.now() expires_at = created_at + self._ttl - session_id = _new_session_id() - span.set_attribute('session.id', str(session_id)) + if new_session_id is None: + new_session_id = _new_session_id() + span.set_attribute('session.id', str(new_session_id)) new_session = self._runtime.create( - session_id=session_id, + session_id=new_session_id, chat_id=chat_id, created_at=created_at, expires_at=expires_at, ) + if result == 'replaced': + span.set_attribute( + 'sandbox.new_container.id', + new_session.container_id, + ) self._repository.save(new_session) _set_active_count(self._metrics, self._repository) + if result == 'replaced': + span.set_attribute('session.id', str(new_session.session_id)) span.set_attribute('container.id', new_session.container_id) span.set_attribute('sandbox.result', result) self._metrics.increment( From b4a2a9ceea1e1fa32e24b93cf65e6d892b866587 Mon Sep 17 00:00:00 2001 From: Azamat Date: Fri, 3 Apr 2026 02:04:51 +0300 Subject: [PATCH 07/10] add sandbox observability failure tests --- tasks.md | 2 +- test/test_docker_runtime.py | 147 ++++++++++++++++++++++++++++++++++- test/test_sandbox_usecase.py | 125 ++++++++++++++++++++++++++++- 3 files changed, 268 insertions(+), 6 deletions(-) diff --git a/tasks.md b/tasks.md index 4b9e7e5..e713a66 100644 --- a/tasks.md +++ b/tasks.md @@ -305,7 +305,7 @@ ### M25. Добрать failure-path observability regression tests - Субагент: `test-engineer` -- Статус: pending +- Статус: completed - Зависимости: `M24` - Commit required: yes - Commit message: `add sandbox observability failure tests` diff --git a/test/test_docker_runtime.py b/test/test_docker_runtime.py index 4db1095..352adad 100644 --- a/test/test_docker_runtime.py +++ b/test/test_docker_runtime.py @@ -43,6 +43,27 @@ class FakeListedContainer(FakeContainer): self.attrs = {'Created': created_at} +class FailingStopContainer(FakeListedContainer): + def __init__( + self, + container_id: str, + *, + labels: dict[str, str], + created_at: str, + error: Exception, + ) -> None: + super().__init__( + container_id, + labels=labels, + created_at=created_at, + ) + self._error = error + + def stop(self) -> None: + self.stop_calls += 1 + raise self._error + + class RunKwargs(TypedDict): detach: bool labels: dict[str, str] @@ -435,23 +456,143 @@ def test_runtime_stop_ignores_missing_container(tmp_path: Path) -> None: config = build_config(tmp_path) containers = FakeContainers() containers.get_result = NotFound('missing') - runtime = build_runtime(config, containers) + metrics = RecordingMetrics() + tracer = RecordingTracer() + runtime = DockerSandboxRuntime( + config, + FakeDockerClient(containers), + metrics, + tracer, + ) runtime.stop('container-123') assert containers.get_calls == ['container-123'] + duration_call = _find_record_call( + metrics, + 'sandbox.runtime.stop.duration_ms', + attrs={'operation': 'stop', 'result': 'not_found'}, + ) + assert duration_call[1] >= 0 + span = _find_span( + tracer, + 'adapter.docker.stop_sandbox', + {'container.id': 'container-123'}, + {'sandbox.result': 'not_found'}, + ) + assert not span.errors + stop_error_calls = [ + call + for call in metrics.increment_calls + if call[0] == 'sandbox.runtime.error.total' + and call[2] is not None + and call[2].get('operation') == 'stop' + ] + assert stop_error_calls == [] def test_runtime_stop_wraps_docker_errors(tmp_path: Path) -> None: config = build_config(tmp_path) containers = FakeContainers() - containers.get_result = DockerException('boom') - runtime = build_runtime(config, containers) + containers.get_result = FailingStopContainer( + 'container-123', + labels={ + 'session_id': str(SESSION_ID), + 'chat_id': str(CHAT_ID), + 'expires_at': '2026-04-02T12:05:00+00:00', + }, + created_at='2026-04-02T12:00:00Z', + error=DockerException('boom'), + ) + metrics = RecordingMetrics() + tracer = RecordingTracer() + runtime = DockerSandboxRuntime( + config, + FakeDockerClient(containers), + metrics, + tracer, + ) with pytest.raises(SandboxError) as excinfo: runtime.stop('container-123') assert str(excinfo.value) == 'sandbox_stop_failed' + _find_increment_call( + metrics, + 'sandbox.runtime.error.total', + attrs={'operation': 'stop', 'error.type': 'DockerException'}, + ) + duration_call = _find_record_call( + metrics, + 'sandbox.runtime.stop.duration_ms', + attrs={'operation': 'stop', 'result': 'error'}, + ) + assert duration_call[1] >= 0 + span = _find_span( + tracer, + 'adapter.docker.stop_sandbox', + {'container.id': 'container-123'}, + { + 'session.id': str(SESSION_ID), + 'chat.id': str(CHAT_ID), + 'sandbox.result': 'error', + }, + ) + cause = excinfo.value.__cause__ + assert isinstance(cause, DockerException) + assert cause in span.errors + + +def test_runtime_stop_records_observability_on_success(tmp_path: Path) -> None: + config = build_config(tmp_path) + containers = FakeContainers() + container = FakeListedContainer( + 'container-123', + labels={ + 'session_id': str(SESSION_ID), + 'chat_id': str(CHAT_ID), + 'expires_at': '2026-04-02T12:05:00+00:00', + }, + created_at='2026-04-02T12:00:00Z', + ) + containers.get_result = container + metrics = RecordingMetrics() + tracer = RecordingTracer() + runtime = DockerSandboxRuntime( + config, + FakeDockerClient(containers), + metrics, + tracer, + ) + + runtime.stop('container-123') + + assert container.stop_calls == 1 + duration_call = _find_record_call( + metrics, + 'sandbox.runtime.stop.duration_ms', + attrs={'operation': 'stop', 'result': 'stopped'}, + ) + assert duration_call[1] >= 0 + span = _find_span( + tracer, + 'adapter.docker.stop_sandbox', + {'container.id': 'container-123'}, + { + 'session.id': str(SESSION_ID), + 'chat.id': str(CHAT_ID), + 'sandbox.result': 'stopped', + }, + ) + assert not span.errors + stop_error_calls = [ + call + for call in metrics.increment_calls + if call[0] == 'sandbox.runtime.error.total' + and call[2] is not None + and call[2].get('operation') == 'stop' + ] + assert stop_error_calls == [] def test_runtime_list_active_sessions_reads_valid_labeled_containers( diff --git a/test/test_sandbox_usecase.py b/test/test_sandbox_usecase.py index 92c7937..068204c 100644 --- a/test/test_sandbox_usecase.py +++ b/test/test_sandbox_usecase.py @@ -281,6 +281,22 @@ class StaleSnapshotRepository(InMemorySandboxSessionRepository): return [self._snapshot] +class FailingSaveRepository(InMemorySandboxSessionRepository): + def __init__(self, error: Exception) -> None: + super().__init__() + self._error = error + self._fail_next_save = False + + def fail_next_save(self) -> None: + self._fail_next_save = True + + def save(self, session: SandboxSession) -> None: + if self._fail_next_save: + self._fail_next_save = False + raise self._error + super().save(session) + + class FakeRuntime: def __init__(self) -> None: self.create_calls: list[dict[str, object]] = [] @@ -706,6 +722,64 @@ def test_create_sandbox_replace_stop_failure_preserves_separate_identities( assert excinfo.value in span.errors +def test_create_sandbox_replace_save_failure_records_stage_safe_trace_ids( + monkeypatch, +) -> None: + now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + expired_session = SandboxSession( + session_id=SESSION_OLD_ID, + chat_id=CHAT_ID, + container_id='container-old', + status=SandboxStatus.RUNNING, + created_at=now - timedelta(minutes=10), + expires_at=now, + ) + repository = FailingSaveRepository(RuntimeError('save_failed')) + repository.save(expired_session) + repository.fail_next_save() + metrics = RecordingMetrics() + tracer = RecordingTracer() + runtime = FakeRuntime() + usecase = CreateSandbox( + repository=repository, + locker=FakeLocker(), + runtime=runtime, + clock=FakeClock(now), + logger=FakeLogger(), + metrics=metrics, + tracer=tracer, + ttl=timedelta(minutes=5), + ) + monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID) + + with pytest.raises(RuntimeError, match='save_failed') as excinfo: + usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) + + assert runtime.stop_calls == ['container-old'] + assert len(runtime.create_calls) == 1 + assert repository.get_active_by_chat_id(CHAT_ID) is None + _assert_increment_metric_present( + metrics, + 'sandbox.create.total', + attrs={'result': 'error'}, + ) + span = _find_span( + tracer, + 'usecase.create_sandbox', + {'chat.id': str(CHAT_ID)}, + { + 'sandbox.previous_session.id': str(SESSION_OLD_ID), + 'sandbox.previous_container.id': 'container-old', + 'sandbox.new_session.id': str(SESSION_NEW_ID), + 'sandbox.new_container.id': f'container-{SESSION_NEW_ID}', + 'sandbox.result': 'error', + }, + ) + assert 'session.id' not in span.attrs + assert 'container.id' not in span.attrs + assert excinfo.value in span.errors + + def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id( monkeypatch, ) -> None: @@ -982,6 +1056,8 @@ def test_cleanup_expired_sandboxes_continues_after_stop_failure() -> None: repository.save(cleaned_session) runtime = FailingStopRuntime('container-fail') logger = FakeLogger() + metrics = RecordingMetrics() + tracer = RecordingTracer() locker = FakeLocker() usecase = CleanupExpiredSandboxes( repository=repository, @@ -989,8 +1065,8 @@ def test_cleanup_expired_sandboxes_continues_after_stop_failure() -> None: runtime=runtime, clock=FakeClock(now), logger=logger, - metrics=NoopMetrics(), - tracer=NoopTracer(), + metrics=metrics, + tracer=tracer, ) result = usecase.execute() @@ -1021,3 +1097,48 @@ def test_cleanup_expired_sandboxes_continues_after_stop_failure() -> None: }, ), ] + _assert_increment_metric_present( + metrics, + 'sandbox.cleanup.error.total', + attrs={'error.type': 'RuntimeError'}, + ) + _assert_increment_metric_present( + metrics, + 'sandbox.cleanup.total', + attrs={'result': 'cleaned'}, + ) + assert _active_count_values(metrics) + assert _active_count_values(metrics)[-1] == 1 + root_span = _find_span( + tracer, + 'usecase.cleanup_expired_sandboxes', + span_attrs={ + 'sandbox.expired_count': 2, + 'sandbox.cleaned_count': 1, + 'sandbox.error_count': 1, + 'sandbox.result': 'completed_with_errors', + }, + ) + assert not root_span.errors + failed_span = _find_span( + tracer, + 'usecase.cleanup_expired_sandbox', + { + 'chat.id': str(FAIL_CHAT_ID), + 'session.id': str(SESSION_FAIL_ID), + 'container.id': 'container-fail', + }, + {'sandbox.result': 'error'}, + ) + assert [str(error) for error in failed_span.errors] == ['stop_failed'] + cleaned_span = _find_span( + tracer, + 'usecase.cleanup_expired_sandbox', + { + 'chat.id': str(CLEAN_CHAT_ID), + 'session.id': str(SESSION_CLEAN_ID), + 'container.id': 'container-clean', + }, + {'sandbox.result': 'cleaned'}, + ) + assert not cleaned_span.errors From 9b6c7908adea4321a199dd2a932662120aedb0e4 Mon Sep 17 00:00:00 2001 From: Azamat Date: Fri, 3 Apr 2026 02:18:54 +0300 Subject: [PATCH 08/10] fix sandbox create rollback gap --- tasks.md | 38 +++++++++++++++++++++++++++++++++++- test/test_sandbox_usecase.py | 37 ++++++++++++++++++++++++++++++++++- usecase/sandbox.py | 22 ++++++++++++++++++++- 3 files changed, 94 insertions(+), 3 deletions(-) diff --git a/tasks.md b/tasks.md index e713a66..77dd415 100644 --- a/tasks.md +++ b/tasks.md @@ -317,9 +317,45 @@ ### M26. Повторный boundary review для sandbox observability - Субагент: `code-reviewer` -- Статус: pending +- Статус: in_progress - Зависимости: `M25` - Commit required: no - Scope: подтвердить, что follow-up fixes закрыли M23 замечания без новых boundary нарушений - Файлы: весь измененный код после `M24`-`M25` - Критерии приемки: нет замечаний по replace tracing identity и missing failure-path observability coverage; clean architecture по-прежнему соблюдена + +## Follow-up после M26 boundary review + +### M27. Компенсация save failure после runtime.create + +- Субагент: `feature-developer` +- Статус: completed +- Зависимости: `M26` +- Commit required: yes +- Commit message: `fix sandbox create rollback gap` +- Scope: не оставлять untracked running container и неконсистентный `sandbox.active.count` при падении `repository.save()` после успешного `runtime.create()` +- Файлы: `usecase/sandbox.py`, при необходимости точечные тесты в `test/*` +- Решение: сделать create/replace path registry-safe через rollback или другой явный compensation path без нарушения clean architecture +- Критерии приемки: save failure не оставляет новый container в runtime без registry state; `sandbox.active.count` отражает финальное committed state; replace и fresh-create failure paths консистентны + +### M28. Регрессии на rollback и startup failure observability + +- Субагент: `test-engineer` +- Статус: pending +- Зависимости: `M27` +- Commit required: yes +- Commit message: `add sandbox rollback regression tests` +- Scope: покрыть tests для save-failure rollback и startup observability failure paths +- Файлы: `test/test_sandbox_usecase.py`, `test/test_docker_runtime.py`, `test/test_create_http.py`, при необходимости другие focused tests в `test/*` +- Решение: добавить tests на fresh-create/replace save failure compensation, `list_active` failure observability и reconciliation failure span/metric expectations где применимо +- Критерии приемки: rollback path покрыт; list/reconciliation failure observability не регрессирует; tests остаются presence-based и стабильными + +### M29. Финальный boundary review для sandbox observability + +- Субагент: `code-reviewer` +- Статус: pending +- Зависимости: `M28` +- Commit required: no +- Scope: подтвердить, что M27-M28 закрыли remaining M26 замечания +- Файлы: весь измененный код после `M27`-`M28` +- Критерии приемки: нет замечаний по rollback gap и startup failure observability coverage; sandbox observability slice приемлем as-is diff --git a/test/test_sandbox_usecase.py b/test/test_sandbox_usecase.py index 068204c..b2e3dcb 100644 --- a/test/test_sandbox_usecase.py +++ b/test/test_sandbox_usecase.py @@ -669,6 +669,39 @@ def test_create_sandbox_error_records_observability(monkeypatch) -> None: assert excinfo.value in span.errors +def test_create_sandbox_save_failure_stops_untracked_container(monkeypatch) -> None: + now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + repository = FailingSaveRepository(RuntimeError('save_failed')) + repository.fail_next_save() + metrics = RecordingMetrics() + runtime = FakeRuntime() + usecase = CreateSandbox( + repository=repository, + locker=FakeLocker(), + runtime=runtime, + clock=FakeClock(now), + logger=FakeLogger(), + metrics=metrics, + tracer=NoopTracer(), + ttl=timedelta(minutes=5), + ) + monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID) + + with pytest.raises(RuntimeError, match='save_failed'): + usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) + + assert len(runtime.create_calls) == 1 + assert runtime.stop_calls == [f'container-{SESSION_NEW_ID}'] + assert repository.get_active_by_chat_id(CHAT_ID) is None + assert _active_count_values(metrics) + assert _active_count_values(metrics)[-1] == 0 + _assert_increment_metric_present( + metrics, + 'sandbox.create.total', + attrs={'result': 'error'}, + ) + + def test_create_sandbox_replace_stop_failure_preserves_separate_identities( monkeypatch, ) -> None: @@ -755,9 +788,11 @@ def test_create_sandbox_replace_save_failure_records_stage_safe_trace_ids( with pytest.raises(RuntimeError, match='save_failed') as excinfo: usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) - assert runtime.stop_calls == ['container-old'] + assert runtime.stop_calls == ['container-old', f'container-{SESSION_NEW_ID}'] assert len(runtime.create_calls) == 1 assert repository.get_active_by_chat_id(CHAT_ID) is None + assert _active_count_values(metrics) + assert _active_count_values(metrics)[-1] == 0 _assert_increment_metric_present( metrics, 'sandbox.create.total', diff --git a/usecase/sandbox.py b/usecase/sandbox.py index 0a3412f..59f1584 100644 --- a/usecase/sandbox.py +++ b/usecase/sandbox.py @@ -107,7 +107,7 @@ class CreateSandbox: 'sandbox.new_container.id', new_session.container_id, ) - self._repository.save(new_session) + self._save_created_session(new_session) _set_active_count(self._metrics, self._repository) if result == 'replaced': span.set_attribute('session.id', str(new_session.session_id)) @@ -131,6 +131,26 @@ class CreateSandbox: span.record_error(exc) raise + def _save_created_session(self, session: SandboxSession) -> None: + try: + self._repository.save(session) + except Exception as exc: + self._compensate_save_failure(session, exc) + raise + + def _compensate_save_failure( + self, + session: SandboxSession, + error: Exception, + ) -> None: + try: + self._runtime.stop(session.container_id) + except Exception as stop_error: + _set_active_count(self._metrics, self._repository) + raise error from stop_error + + _set_active_count(self._metrics, self._repository) + class CleanupExpiredSandboxes: def __init__( From c5b6a84a4b71ac5d21c431565d5eba14ebd30b72 Mon Sep 17 00:00:00 2001 From: Azamat Date: Fri, 3 Apr 2026 02:29:18 +0300 Subject: [PATCH 09/10] add sandbox rollback regression tests --- tasks.md | 2 +- test/test_create_http.py | 148 ++++++++++++++++++++++++++++++++++++ test/test_docker_runtime.py | 40 ++++++++++ 3 files changed, 189 insertions(+), 1 deletion(-) diff --git a/tasks.md b/tasks.md index 77dd415..861a726 100644 --- a/tasks.md +++ b/tasks.md @@ -341,7 +341,7 @@ ### M28. Регрессии на rollback и startup failure observability - Субагент: `test-engineer` -- Статус: pending +- Статус: completed - Зависимости: `M27` - Commit required: yes - Commit message: `add sandbox rollback regression tests` diff --git a/test/test_create_http.py b/test/test_create_http.py index e8686c4..ae302c2 100644 --- a/test/test_create_http.py +++ b/test/test_create_http.py @@ -3,6 +3,7 @@ import json from datetime import UTC, datetime, timedelta from uuid import UUID +import pytest from docker import DockerClient from fastapi import FastAPI from starlette.types import Message, Scope @@ -226,6 +227,16 @@ class FixedSandboxState: return list(self._sessions) +class FailingSandboxState: + def __init__(self, error: Exception) -> None: + self._error = error + self.calls = 0 + + def list_active_sessions(self) -> list[SandboxSession]: + self.calls += 1 + raise self._error + + class CountingRegistry: def __init__(self, count_active_result: int) -> None: self._count_active_result = count_active_result @@ -238,6 +249,25 @@ class CountingRegistry: return self._count_active_result +class FailingRegistry: + def __init__(self, error: Exception, *, fail_on: str = 'replace_all') -> None: + self._error = error + self._fail_on = fail_on + self.replaced_sessions: list[SandboxSession] = [] + self.count_calls = 0 + + def replace_all(self, sessions: list[SandboxSession]) -> None: + self.replaced_sessions = list(sessions) + if self._fail_on == 'replace_all': + raise self._error + + def count_active(self) -> int: + self.count_calls += 1 + if self._fail_on == 'count_active': + raise self._error + return 0 + + def build_config() -> AppConfig: return AppConfig( app=AppSectionConfig(name='master', env='test'), @@ -719,6 +749,124 @@ def test_reconciliation_uses_registry_backed_active_count_metric() -> None: assert tracer.spans[0][2].attrs['sandbox.active_count'] == 7 +def test_reconciliation_records_error_when_state_source_fails() -> None: + logger = FakeLogger() + metrics = RecordingMetrics() + tracer = RecordingTracer() + state_error = RuntimeError('state_failed') + state_source = FailingSandboxState(state_error) + reconciler = SandboxSessionReconciler( + state_source=state_source, + registry=CountingRegistry(count_active_result=7), + logger=logger, + metrics=metrics, + tracer=tracer, + ) + + with pytest.raises(RuntimeError, match='state_failed') as excinfo: + reconciler.execute() + + assert state_source.calls == 1 + assert metrics.set_calls == [] + spans = [ + span + for name, _, span in tracer.spans + if name == 'adapter.sandbox.reconcile_sessions' + ] + assert spans + span = spans[0] + assert span.attrs['sandbox.result'] == 'error' + assert 'sandbox.discovered_count' not in span.attrs + assert 'sandbox.active_count' not in span.attrs + assert excinfo.value in span.errors + + +def test_reconciliation_records_error_without_active_count_metric_on_registry_failure() -> ( + None +): + logger = FakeLogger() + metrics = RecordingMetrics() + tracer = RecordingTracer() + created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + session = SandboxSession( + session_id=SESSION_ID, + chat_id=CHAT_ID, + container_id='container-123', + status=SandboxStatus.RUNNING, + created_at=created_at, + expires_at=created_at + timedelta(minutes=5), + ) + registry_error = RuntimeError('replace_failed') + registry = FailingRegistry(registry_error) + reconciler = SandboxSessionReconciler( + state_source=FixedSandboxState([session]), + registry=registry, + logger=logger, + metrics=metrics, + tracer=tracer, + ) + + with pytest.raises(RuntimeError, match='replace_failed') as excinfo: + reconciler.execute() + + assert registry.replaced_sessions == [session] + assert registry.count_calls == 0 + assert metrics.set_calls == [] + spans = [ + span + for name, _, span in tracer.spans + if name == 'adapter.sandbox.reconcile_sessions' + ] + assert spans + span = spans[0] + assert span.attrs['sandbox.discovered_count'] == 1 + assert span.attrs['sandbox.result'] == 'error' + assert 'sandbox.active_count' not in span.attrs + assert excinfo.value in span.errors + + +def test_reconciliation_records_error_when_registry_count_active_fails() -> None: + logger = FakeLogger() + metrics = RecordingMetrics() + tracer = RecordingTracer() + created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) + session = SandboxSession( + session_id=SESSION_ID, + chat_id=CHAT_ID, + container_id='container-123', + status=SandboxStatus.RUNNING, + created_at=created_at, + expires_at=created_at + timedelta(minutes=5), + ) + registry_error = RuntimeError('count_failed') + registry = FailingRegistry(registry_error, fail_on='count_active') + reconciler = SandboxSessionReconciler( + state_source=FixedSandboxState([session]), + registry=registry, + logger=logger, + metrics=metrics, + tracer=tracer, + ) + + with pytest.raises(RuntimeError, match='count_failed') as excinfo: + reconciler.execute() + + assert registry.replaced_sessions == [session] + assert registry.count_calls == 1 + assert metrics.set_calls == [] + spans = [ + span + for name, _, span in tracer.spans + if name == 'adapter.sandbox.reconcile_sessions' + ] + assert spans + span = spans[0] + assert span.attrs['sandbox.discovered_count'] == 1 + assert 'sandbox.active_count' not in span.attrs + assert span.attrs['sandbox.result'] == 'error' + assert excinfo.value in span.errors + + def test_build_container_wires_observability_into_runtime_and_reconciler( monkeypatch, ) -> None: diff --git a/test/test_docker_runtime.py b/test/test_docker_runtime.py index 352adad..267d177 100644 --- a/test/test_docker_runtime.py +++ b/test/test_docker_runtime.py @@ -83,6 +83,7 @@ class FakeContainers: self.run_result = run_result or FakeContainer('container-123') self.get_result: FakeContainer | Exception | None = None self.list_result: list[object] = [] + self.list_error: Exception | None = None def run( self, @@ -114,6 +115,8 @@ class FakeContainers: def list(self, *, filters: dict[str, list[str]]) -> list[object]: self.list_calls.append({'filters': filters}) + if self.list_error is not None: + raise self.list_error return self.list_result @@ -690,3 +693,40 @@ def test_runtime_list_active_records_observability(tmp_path: Path) -> None: }, ) assert not span.errors + + +def test_runtime_list_active_error_records_observability(tmp_path: Path) -> None: + config = build_config(tmp_path) + containers = FakeContainers() + containers.list_error = DockerException('boom') + metrics = RecordingMetrics() + tracer = RecordingTracer() + runtime = DockerSandboxRuntime( + config, + FakeDockerClient(containers), + metrics, + tracer, + ) + + with pytest.raises(SandboxError) as excinfo: + runtime.list_active_sessions() + + assert str(excinfo.value) == 'sandbox_list_failed' + _find_increment_call( + metrics, + 'sandbox.runtime.error.total', + attrs={'operation': 'list_active', 'error.type': 'DockerException'}, + ) + duration_call = _find_record_call( + metrics, + 'sandbox.runtime.list_active.duration_ms', + attrs={'operation': 'list_active', 'result': 'error'}, + ) + assert duration_call[1] >= 0 + span = _find_span( + tracer, + 'adapter.docker.list_active_sandboxes', + span_attrs={'sandbox.result': 'error'}, + ) + assert isinstance(excinfo.value.__cause__, DockerException) + assert excinfo.value in span.errors From 3293bccc5b24baa2b8222b46251fb9ae4c8ff9d4 Mon Sep 17 00:00:00 2001 From: Azamat Date: Fri, 3 Apr 2026 09:49:14 +0300 Subject: [PATCH 10/10] [feat] update readme.md --- AGENTS.md | 10 +- README.md | 315 ++++++++++++++++++++++++++++++------ config/docker-compose.yml | 4 +- tasks.md | 2 +- test/test_docker_runtime.py | 2 +- 5 files changed, 273 insertions(+), 60 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index ba5bc34..c9f89d9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -45,12 +45,12 @@ - Do not use Beads - Do not use `bd` - Use `uv` for Python commands and dependency management -- Do not create commits on your own -- Work on one task at a time - Prefer delegation for implementation -- Delegate only one task at a time -- After one task return to the user with result verification and next options -- Wait for the user before the next task commit or fix +- After implementation, run `Code-Reviewer` agent +- Pass errors to `test-engineer` agent to capture +- Delegate `Feature-Developer` agent fix the errors +- Repeat the cycle until no errors remain +- Ensure all tests pass ## Makefile - `make install` install deps with `uv` diff --git a/README.md b/README.md index 2339337..3ff3dd6 100644 --- a/README.md +++ b/README.md @@ -1,36 +1,268 @@ -Это шаблон Python-сервиса на чистой архитектуре с заменяемым web-слоем, типизированным конфигом, явным dependency wiring и observability через порты. +# master-service -## Что это за проект +`master-service` — это control-plane сервис для sandbox-контейнеров с AI-агентом. +Он поднимает и переиспользует sandbox на чат, подключает рабочие volume, восстанавливает state после рестарта и отдает наружу минимальный HTTP API под `/api/v1`. -- Небольшой референсный сервис со слоями `domain/`, `usecase/`, `repository/` и `adapter/` -- Шаблон для сервисов на FastAPI, где FastAPI остается только во внешнем HTTP adapter -- Проект, где конфиг собирается из `config/app.yaml`, `.env` и env vars в одно дерево dataclass-конфигов -- Проект, где repository и usecase создаются один раз на старте приложения в composition root -- Проект, где логи, метрики и трейсы скрыты за интерфейсами и могут работать через `stdout`, файл или OpenTelemetry runtime +Важно: в локальном `config/app.yaml` исторически еще стоят template-имена `web-python-skelet`. +Если хочешь, чтобы `/health` и OTel service name локально тоже показывали `master-service`, переопредели: +- `APP_NAME=master-service` +- `APP_OTEL_SERVICE_NAME=master-service` -## Основные идеи +Сервис реализован на Python с Clean Architecture: +- `domain/` — сущности и доменные ошибки +- `usecase/` — сценарии приложения и порты +- `repository/` — реализации repository +- `adapter/` — HTTP, config, DI, Docker runtime и observability -- Clean Architecture и границы SOLID -- Направление зависимостей только внутрь -- Тонкие adapter-слои и явная сборка зависимостей -- Заменяемый HTTP-слой -- Observability без протекания OpenTelemetry во внутренние слои +## Что умеет сейчас + +Текущий sandbox MVP покрывает: +- `GET /api/v1/health` +- `POST /api/v1/create` с `chat_id: UUID` +- одну активную sandbox на чат +- reuse активной sandbox до истечения TTL +- cleanup просроченных sandbox в фоне +- startup reconciliation по Docker labels после рестарта сервиса +- chat mount `rw`, dependencies mount `ro`, lambda-tools mount `ro` +- логи, метрики и трейсы через порты `Logger`, `Metrics`, `Tracer` + +Пока вне scope: +- auth и access control +- p2p/WebSocket lease +- workspace/chat CRUD API +- central DB, artifacts, S3, quota и retention policy + +## Как устроен проект + +- FastAPI живет только во внешнем adapter слое +- Docker живет только во внешнем adapter слое +- конфиг собирается из `config/app.yaml`, `.env` и env vars в один dataclass tree +- repository и usecase создаются один раз на старте в `adapter/di/container.py` +- observability не протекает во внутренние слои через OpenTelemetry SDK + +## Структура + +- `domain/` — core model и domain errors +- `usecase/` — use cases и interfaces +- `repository/` — in-memory и другие repository implementations +- `adapter/config/` — typed config models и loader +- `adapter/docker/` — Docker sandbox runtime +- `adapter/observability/` — logger/metrics/tracer runtime factory +- `adapter/otel/` — OpenTelemetry adapters +- `adapter/di/` — composition root +- `adapter/http/fastapi/` — app, middleware, schemas, routers +- `adapter/sandbox/` — sandbox reconciliation logic +- `config/` — YAML config files +- `docs/` — ADR и проектные гайды ## Быстрый старт +### Требования + +- Python 3.13 +- `uv` +- локальный Docker daemon +- секреты `APP_API_TOKEN` и `APP_SIGNING_KEY` + +### Установка + ```bash make install +``` + +### Локальный запуск + +```bash APP_API_TOKEN=local-api-token APP_SIGNING_KEY=local-signing-key make run ``` -Приложение стартует на `http://0.0.0.0:8123` и публикует versioned API под `/api/v1`. +Это поднимет сам API, но для успешного `POST /api/v1/create` локально нужен еще рабочий sandbox runtime: + +- Docker daemon должен быть доступен по `docker.base_url` +- образ `sandbox.image` должен существовать локально +- директории `sandbox.dependencies_host_path` и `sandbox.lambda_tools_host_path` должны существовать + +В дефолтном `config/app.yaml` это значит: + +```bash +mkdir -p var/sandbox/dependencies var/sandbox/lambda-tools +docker image inspect ai-agent:latest >/dev/null +``` + +Если у тебя нет готового `ai-agent:latest`, проще начать с Docker Compose smoke path ниже. + +После старта сервис доступен на: +- `http://127.0.0.1:8123/api/v1/health` + +Проверка health: + +```bash +curl http://127.0.0.1:8123/api/v1/health +``` + +Создание или reuse sandbox: + +```bash +curl -X POST http://127.0.0.1:8123/api/v1/create \ + -H 'Content-Type: application/json' \ + -d '{"chat_id":"11111111-1111-1111-1111-111111111111"}' +``` + +Пример ответа: + +```json +{ + "session_id": "3701cfe3-e05e-48af-8385-442dcd954ca2", + "chat_id": "11111111-1111-1111-1111-111111111111", + "container_id": "64d839c6007de9396ee08ad4af4a22a59a6410ec5f4892a9277a87eb49c3ff5d", + "status": "running", + "expires_at": "2026-04-02T21:11:38.292893Z" +} +``` + +## Запуск через Docker Compose + +Для локального smoke-run есть `docker-compose.yml`. +Он поднимает: +- `app` +- `docker-engine` в режиме Docker-in-Docker +- `otel-collector` + +При этом `app` получает compose-specific config из: +- `config/docker-compose.yml` + +Запуск: + +```bash +make compose-up +``` + +Проверка: + +```bash +make compose-ps +make compose-logs +``` + +Остановка: + +```bash +make compose-down +``` + +Важно: +- в `config/docker-compose.yml` сейчас для smoke-проверки стоит `sandbox.image: nginx:1.27-alpine` +- для реального agent runtime замени `sandbox.image` на образ своего sandbox/agent контейнера +- в compose auth env vars нужны для startup config, но текущий MVP API еще не проверяет request token + +## Как конфигурировать + +### Источники конфига + +Конфиг собирается в таком порядке: +1. базовый YAML из `config/app.yaml` +2. значения из `.env` +3. process env vars поверх `.env` + +То есть env vars имеют наивысший приоритет. + +### Обязательные секреты + +Нужны всегда: +- `APP_API_TOKEN` +- `APP_SIGNING_KEY` + +Сейчас это startup config, а не активная request auth для `/api/v1/create` и `/api/v1/health`. +То есть в текущем MVP токен не нужно передавать в HTTP headers для вызова этих endpoint. + +### Основные секции YAML + +В `config/app.yaml` и `config/docker-compose.yml` есть секции: +- `app` +- `http` +- `logging` +- `metrics` +- `tracing` +- `otel` +- `docker` +- `sandbox` +- `security` + +### Полезные env overrides + +Чаще всего полезны: + +#### Общие +- `APP_NAME` +- `APP_ENV` +- `APP_HTTP_HOST` +- `APP_HTTP_PORT` + +#### Логирование и observability +- `APP_LOGGING_LEVEL` +- `APP_LOGGING_OUTPUT` +- `APP_LOGGING_FORMAT` +- `APP_LOGGING_FILE_PATH` +- `APP_METRICS_ENABLED` +- `APP_TRACING_ENABLED` +- `APP_OTEL_SERVICE_NAME` +- `APP_OTEL_LOGS_ENDPOINT` +- `APP_OTEL_METRICS_ENDPOINT` +- `APP_OTEL_TRACES_ENDPOINT` + +#### Docker runtime +- `APP_DOCKER_BASE_URL` + +#### Sandbox +- `APP_SANDBOX_IMAGE` +- `APP_SANDBOX_TTL_SECONDS` +- `APP_SANDBOX_CLEANUP_INTERVAL_SECONDS` +- `APP_SANDBOX_CHATS_ROOT` +- `APP_SANDBOX_DEPENDENCIES_HOST_PATH` +- `APP_SANDBOX_LAMBDA_TOOLS_HOST_PATH` +- `APP_SANDBOX_CHAT_MOUNT_PATH` +- `APP_SANDBOX_DEPENDENCIES_MOUNT_PATH` +- `APP_SANDBOX_LAMBDA_TOOLS_MOUNT_PATH` + +#### Security +- `APP_API_TOKEN_HEADER` +- `APP_API_TOKEN` +- `APP_SIGNING_KEY` + +### Что важно в sandbox config + +- `docker.base_url` — адрес Docker daemon +- `sandbox.image` — образ sandbox контейнера +- `sandbox.ttl_seconds` — TTL sandbox +- `sandbox.cleanup_interval_seconds` — частота cleanup loop +- `sandbox.chats_root` — корень chat directories +- `sandbox.dependencies_host_path` — host path для dependency cache +- `sandbox.lambda_tools_host_path` — host path для read-only lambda-tools +- `sandbox.chat_mount_path` — путь внутри sandbox для chat volume +- `sandbox.dependencies_mount_path` — путь внутри sandbox для dependency cache +- `sandbox.lambda_tools_mount_path` — путь внутри sandbox для lambda-tools + +## Основные команды + +- `make install` — установить зависимости через `uv` +- `make run` — локальный запуск +- `make run-otel` — запуск с OTel endpoints из env +- `make test` — `pytest` +- `make lint` — `ruff` +- `make typecheck` — `mypy` +- `make pre-commit` — lint + typecheck + test +- `make compose-build` — собрать compose images +- `make compose-up` — поднять локальный stack +- `make compose-down` — остановить stack +- `make compose-logs` — смотреть логи +- `make compose-ps` — смотреть статус сервисов ## Документация ### Гайды - [Правила проекта и ограничения для агента](AGENTS.md) -- [Кодстайл проекта для AI-агента](docs/CODESTYLE.md) +- [Кодстайл проекта](docs/CODESTYLE.md) - [Чистая архитектура, SOLID, DIP, Protocol и repository](docs/CLEAN_ARCHITECTURE_RU.md) - [Логи, метрики и трейсы в этом проекте](docs/OBSERVABILITY_RU.md) - [Как чистая архитектура реализована здесь](docs/PROJECT_GUIDE_RU.md) @@ -43,43 +275,24 @@ APP_API_TOKEN=local-api-token APP_SIGNING_KEY=local-signing-key make run - [003 Observability Via Interfaces](docs/003-observability-via-interfaces.md) - [004 Versioned HTTP API](docs/004-versioned-http-api.md) - [005 Early FastAPI OTel Instrumentation](docs/005-fastapi-otel-early-instrumentation.md) +- [006 MVP Docker Sandbox Orchestration](docs/006-mvp-docker-sandbox-orchestration.md) +- [007 Startup Sandbox Reconciliation](docs/007-startup-sandbox-reconciliation.md) +- [008 Sandbox Lifecycle Observability](docs/008-sandbox-lifecycle-observability.md) -## Структура проекта +## Для AI-агента -- `domain/` - core-сущности и доменные ошибки -- `usecase/` - прикладные сценарии и порты -- `repository/` - реализации repository -- `adapter/config/` - загрузка и модели типизированного конфига -- `adapter/observability/` - выбор runtime для logger, metrics и tracer -- `adapter/otel/` - OpenTelemetry adapters -- `adapter/di/` - composition root и singleton wiring -- `adapter/http/fastapi/` - HTTP-схемы, dependencies, middleware и routers -- `config/` - YAML-конфиг приложения и локального OTel collector +Если ты меняешь проект как AI-агент, сначала прочитай: -## Для ИИ +1. [AGENTS.md](AGENTS.md) +2. [docs/CODESTYLE.md](docs/CODESTYLE.md) +3. [docs/PROJECT_GUIDE_RU.md](docs/PROJECT_GUIDE_RU.md) +4. [docs/CLEAN_ARCHITECTURE_RU.md](docs/CLEAN_ARCHITECTURE_RU.md) +5. [docs/OBSERVABILITY_RU.md](docs/OBSERVABILITY_RU.md) +6. релевантные ADR в `docs/` +7. [tasks.md](tasks.md) -Если ты AI-агент и собираешься что-то менять в проекте, сначала прочитай документы в таком порядке: - -1. [Правила проекта и ограничения агента](AGENTS.md) - обязательные правила работы в этом репозитории -2. [Кодстайл проекта для AI-агента](docs/CODESTYLE.md) - границы слоев, стиль кода и правила зависимостей -3. [Как чистая архитектура реализована здесь](docs/PROJECT_GUIDE_RU.md) - практическая карта проекта и типовые сценарии изменений -4. [Чистая архитектура, SOLID, DIP, Protocol и repository](docs/CLEAN_ARCHITECTURE_RU.md) - базовые архитектурные принципы и примеры -5. [Логи, метрики и трейсы в этом проекте](docs/OBSERVABILITY_RU.md) - читать перед любыми изменениями в observability, middleware и runtime wiring -6. [ADR в `docs/`](docs/001-composition-root-and-lifetimes.md) - читать релевантные решения перед изменением архитектуры или startup wiring -7. [План задач и история работ](tasks.md) - понять, что уже сделано, что отложено и какие ограничения были зафиксированы - -Перед началом работы: - -- Определи, в каком слое будет изменение: `domain/`, `usecase/`, `repository/` или `adapter/` -- Убедись, что зависимости идут только внутрь -- Не тащи FastAPI и OpenTelemetry во внутренние слои -- Сначала изучи существующий код в нужной директории, потом вноси изменения -- Если задача затрагивает архитектурное решение, сначала сверяйся с ADR и проектными правилами - -## Запуск и команды - -- Для локального запуска нужны `APP_API_TOKEN` и `APP_SIGNING_KEY` -- `make run` запускает приложение локально -- `make run-otel` запускает приложение с локальными OTel endpoints из env vars -- `make pre-commit` запускает `ruff`, `mypy` и `pytest` -- `make compose-up` поднимает приложение и локальный LGTM stack через Docker Compose +Главные правила: +- сначала определи слой изменения +- зависимости только внутрь +- не тащи FastAPI и OpenTelemetry во внутренние слои +- архитектурные решения сверяй с ADR diff --git a/config/docker-compose.yml b/config/docker-compose.yml index a601f99..5ddb745 100644 --- a/config/docker-compose.yml +++ b/config/docker-compose.yml @@ -29,8 +29,8 @@ docker: sandbox: image: nginx:1.27-alpine - ttl_seconds: 30 - cleanup_interval_seconds: 5 + ttl_seconds: 300 + cleanup_interval_seconds: 60 chats_root: /var/lib/master-sandbox/chats dependencies_host_path: /var/lib/master-dependencies lambda_tools_host_path: /var/lib/master-lambda-tools diff --git a/tasks.md b/tasks.md index 861a726..d5009d7 100644 --- a/tasks.md +++ b/tasks.md @@ -353,7 +353,7 @@ ### M29. Финальный boundary review для sandbox observability - Субагент: `code-reviewer` -- Статус: pending +- Статус: completed - Зависимости: `M28` - Commit required: no - Scope: подтвердить, что M27-M28 закрыли remaining M26 замечания diff --git a/test/test_docker_runtime.py b/test/test_docker_runtime.py index 267d177..7f71275 100644 --- a/test/test_docker_runtime.py +++ b/test/test_docker_runtime.py @@ -14,7 +14,7 @@ from adapter.docker.runtime import DockerSandboxRuntime from adapter.observability.noop import NoopMetrics, NoopTracer from domain.error import SandboxError, SandboxStartError from domain.sandbox import SandboxSession, SandboxStatus -from usecase.interface import AttrValue, Attrs +from usecase.interface import Attrs, AttrValue CHAT_ID = UUID('123e4567-e89b-12d3-a456-426614174000') NON_CANONICAL_CHAT_ID = '123E4567E89B12D3A456426614174000'