diff --git a/.gitignore b/.gitignore index 56872e7..35df624 100644 --- a/.gitignore +++ b/.gitignore @@ -16,5 +16,3 @@ wheels/ !tasks.md opencode.json - -practice_report.md diff --git a/Makefile b/Makefile index 37d717e..ed72fd7 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ .PHONY: help install test lint typecheck run run-otel compose-build compose-up compose-down compose-logs compose-ps pre-commit COMPOSE ?= docker-compose -APP_API_TOKEN ?= 1234 -APP_SIGNING_KEY ?= 1234 +APP_API_TOKEN ?= local-api-token +APP_SIGNING_KEY ?= local-signing-key APP_OTEL_LOGS_ENDPOINT ?= http://localhost:4318/v1/logs APP_OTEL_METRICS_ENDPOINT ?= http://localhost:4318/v1/metrics APP_OTEL_TRACES_ENDPOINT ?= http://localhost:4318/v1/traces diff --git a/README.md b/README.md index 715fedc..3ff3dd6 100644 --- a/README.md +++ b/README.md @@ -18,15 +18,12 @@ Текущий sandbox MVP покрывает: - `GET /api/v1/health` -- `POST /api/v1/create` с `chat_id`, `agent_id` и `volume_host_path` -- `DELETE /api/v1/sandboxes/{chat_id}` для удаления активной sandbox +- `POST /api/v1/create` с `chat_id: UUID` - одну активную sandbox на чат -- reuse активной sandbox до истечения TTL при совпадении `agent_id` и `volume_host_path` -- возврат Docker-network endpoint `ip:port` после создания или reuse sandbox +- reuse активной sandbox до истечения TTL - cleanup просроченных sandbox в фоне - startup reconciliation по Docker labels после рестарта сервиса -- chat mount `rw`, request volume mount `rw`, dependencies mount `ro`, lambda-tools mount `ro` -- прокидывание `AGENT_ID` в env sandbox-контейнера +- chat mount `rw`, dependencies mount `ro`, lambda-tools mount `ro` - логи, метрики и трейсы через порты `Logger`, `Metrics`, `Tracer` Пока вне scope: @@ -84,15 +81,11 @@ APP_API_TOKEN=local-api-token APP_SIGNING_KEY=local-signing-key make run - Docker daemon должен быть доступен по `docker.base_url` - образ `sandbox.image` должен существовать локально - директории `sandbox.dependencies_host_path` и `sandbox.lambda_tools_host_path` должны существовать -- Docker network `sandbox.network_name` должен существовать заранее -- `volume_host_path` из request должен быть абсолютным host path В дефолтном `config/app.yaml` это значит: ```bash mkdir -p var/sandbox/dependencies var/sandbox/lambda-tools -mkdir -p /tmp/master-volume -docker network inspect sandbox >/dev/null || docker network create sandbox docker image inspect ai-agent:latest >/dev/null ``` @@ -112,11 +105,7 @@ curl http://127.0.0.1:8123/api/v1/health ```bash curl -X POST http://127.0.0.1:8123/api/v1/create \ -H 'Content-Type: application/json' \ - -d '{ - "chat_id":"11111111-1111-1111-1111-111111111111", - "agent_id":"agent-local", - "volume_host_path":"/tmp/master-volume" - }' + -d '{"chat_id":"11111111-1111-1111-1111-111111111111"}' ``` Пример ответа: @@ -125,41 +114,12 @@ curl -X POST http://127.0.0.1:8123/api/v1/create \ { "session_id": "3701cfe3-e05e-48af-8385-442dcd954ca2", "chat_id": "11111111-1111-1111-1111-111111111111", - "agent_id": "agent-local", - "volume_host_path": "/tmp/master-volume", "container_id": "64d839c6007de9396ee08ad4af4a22a59a6410ec5f4892a9277a87eb49c3ff5d", - "endpoint": { - "ip": "172.18.0.5", - "port": 8000 - }, "status": "running", "expires_at": "2026-04-02T21:11:38.292893Z" } ``` -Повторный create для того же `chat_id` вернет существующую sandbox только если совпадают `agent_id` и canonical `volume_host_path`. -Если параметры отличаются, API вернет `409 sandbox_conflict`. - -Удаление sandbox по chat: - -```bash -curl -X DELETE http://127.0.0.1:8123/api/v1/sandboxes/11111111-1111-1111-1111-111111111111 -``` - -Пример ответа: - -```json -{ - "chat_id": "11111111-1111-1111-1111-111111111111", - "result": "deleted", - "session_id": "3701cfe3-e05e-48af-8385-442dcd954ca2", - "container_id": "64d839c6007de9396ee08ad4af4a22a59a6410ec5f4892a9277a87eb49c3ff5d" -} -``` - -Если активной sandbox нет, `result` будет `not_found`. -Возвращенный `endpoint.ip` доступен из контейнеров, подключенных к той же Docker network. - ## Запуск через Docker Compose Для локального smoke-run есть `docker-compose.yml`. @@ -193,7 +153,6 @@ make compose-down Важно: - в `config/docker-compose.yml` сейчас для smoke-проверки стоит `sandbox.image: nginx:1.27-alpine` - для реального agent runtime замени `sandbox.image` на образ своего sandbox/agent контейнера -- sandbox подключается к Docker network из `sandbox.network_name`; внешний контейнер-интеграцию нужно подключить к этой же сети - в compose auth env vars нужны для startup config, но текущий MVP API еще не проверяет request token ## Как конфигурировать @@ -213,7 +172,7 @@ make compose-down - `APP_API_TOKEN` - `APP_SIGNING_KEY` -Сейчас это startup config, а не активная request auth для `/api/v1/create`, `/api/v1/sandboxes/{chat_id}` и `/api/v1/health`. +Сейчас это startup config, а не активная request auth для `/api/v1/create` и `/api/v1/health`. То есть в текущем MVP токен не нужно передавать в HTTP headers для вызова этих endpoint. ### Основные секции YAML @@ -256,8 +215,6 @@ make compose-down #### Sandbox - `APP_SANDBOX_IMAGE` -- `APP_SANDBOX_NETWORK_NAME` -- `APP_SANDBOX_AGENT_SERVICE_PORT` - `APP_SANDBOX_TTL_SECONDS` - `APP_SANDBOX_CLEANUP_INTERVAL_SECONDS` - `APP_SANDBOX_CHATS_ROOT` @@ -266,7 +223,6 @@ make compose-down - `APP_SANDBOX_CHAT_MOUNT_PATH` - `APP_SANDBOX_DEPENDENCIES_MOUNT_PATH` - `APP_SANDBOX_LAMBDA_TOOLS_MOUNT_PATH` -- `APP_SANDBOX_VOLUME_MOUNT_PATH` #### Security - `APP_API_TOKEN_HEADER` @@ -277,8 +233,6 @@ make compose-down - `docker.base_url` — адрес Docker daemon - `sandbox.image` — образ sandbox контейнера -- `sandbox.network_name` — Docker network для sandbox и интеграций -- `sandbox.agent_service_port` — порт agent service внутри sandbox, возвращается в response endpoint - `sandbox.ttl_seconds` — TTL sandbox - `sandbox.cleanup_interval_seconds` — частота cleanup loop - `sandbox.chats_root` — корень chat directories @@ -287,7 +241,6 @@ make compose-down - `sandbox.chat_mount_path` — путь внутри sandbox для chat volume - `sandbox.dependencies_mount_path` — путь внутри sandbox для dependency cache - `sandbox.lambda_tools_mount_path` — путь внутри sandbox для lambda-tools -- `sandbox.volume_mount_path` — путь внутри sandbox для request `volume_host_path` ## Основные команды @@ -325,7 +278,6 @@ make compose-down - [006 MVP Docker Sandbox Orchestration](docs/006-mvp-docker-sandbox-orchestration.md) - [007 Startup Sandbox Reconciliation](docs/007-startup-sandbox-reconciliation.md) - [008 Sandbox Lifecycle Observability](docs/008-sandbox-lifecycle-observability.md) -- [009 Sandbox HTTP Control and Runtime Params](docs/009-sandbox-http-control-and-runtime-params.md) ## Для AI-агента diff --git a/adapter/config/loader.py b/adapter/config/loader.py index b3b47c3..f33b908 100644 --- a/adapter/config/loader.py +++ b/adapter/config/loader.py @@ -247,20 +247,6 @@ def _load_sandbox_config( env, 'APP_SANDBOX_IMAGE', ), - network_name=_yaml_or_env_str( - section, - 'network_name', - 'sandbox.network_name', - env, - 'APP_SANDBOX_NETWORK_NAME', - ), - agent_service_port=_yaml_or_env_int( - section, - 'agent_service_port', - 'sandbox.agent_service_port', - env, - 'APP_SANDBOX_AGENT_SERVICE_PORT', - ), ttl_seconds=_yaml_or_env_int( section, 'ttl_seconds', @@ -317,31 +303,9 @@ def _load_sandbox_config( env, 'APP_SANDBOX_LAMBDA_TOOLS_MOUNT_PATH', ), - volume_mount_path=_yaml_or_env_str( - section, - 'volume_mount_path', - 'sandbox.volume_mount_path', - env, - 'APP_SANDBOX_VOLUME_MOUNT_PATH', - ), - extra_env=_load_sandbox_extra_env(section), ) -def _load_sandbox_extra_env(section: Mapping[str, object]) -> dict[str, str]: - raw = section.get('env') - if raw is None: - return {} - if not isinstance(raw, dict): - raise ConfigError('invalid sandbox.env') - result: dict[str, str] = {} - for key, value in raw.items(): - if not isinstance(key, str): - raise ConfigError('invalid sandbox.env key') - result[key] = str(value) - return result - - def _load_otel_config( data: Mapping[str, object], env: Mapping[str, str], diff --git a/adapter/config/model.py b/adapter/config/model.py index 594a1ca..3a8e70d 100644 --- a/adapter/config/model.py +++ b/adapter/config/model.py @@ -48,8 +48,6 @@ class DockerConfig: @dataclass(frozen=True, slots=True) class SandboxConfig: image: str - network_name: str - agent_service_port: int ttl_seconds: int cleanup_interval_seconds: int chats_root: str @@ -58,8 +56,6 @@ class SandboxConfig: chat_mount_path: str dependencies_mount_path: str lambda_tools_mount_path: str - volume_mount_path: str - extra_env: dict[str, str] @dataclass(frozen=True, slots=True) diff --git a/adapter/di/container.py b/adapter/di/container.py index 1d68b13..b18382c 100644 --- a/adapter/di/container.py +++ b/adapter/di/container.py @@ -15,7 +15,7 @@ from adapter.sandbox.reconciliation import SandboxSessionReconciler from repository.sandbox_lock import ProcessLocalSandboxLifecycleLocker from repository.sandbox_session import InMemorySandboxSessionRepository from usecase.interface import Clock -from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox, DeleteSandbox +from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox @dataclass(frozen=True, slots=True) @@ -27,7 +27,6 @@ class AppRepositories: class AppUsecases: create_sandbox: CreateSandbox cleanup_expired_sandboxes: CleanupExpiredSandboxes - delete_sandbox: DeleteSandbox @dataclass(slots=True) @@ -117,14 +116,6 @@ def build_container( metrics=observability.metrics, tracer=observability.tracer, ), - delete_sandbox=DeleteSandbox( - repository=sandbox_repository, - locker=sandbox_locker, - runtime=sandbox_runtime, - logger=observability.logger, - metrics=observability.metrics, - tracer=observability.tracer, - ), ) return AppContainer( diff --git a/adapter/docker/runtime.py b/adapter/docker/runtime.py index 5be188f..3c6e93c 100644 --- a/adapter/docker/runtime.py +++ b/adapter/docker/runtime.py @@ -9,17 +9,10 @@ from docker.types import Mount from adapter.config.model import SandboxConfig from domain.error import SandboxError, SandboxStartError -from domain.sandbox import SandboxEndpoint, SandboxSession, SandboxStatus +from domain.sandbox import SandboxSession, SandboxStatus from usecase.interface import Metrics, SandboxRuntime, Span, Tracer -SANDBOX_LABELS = ( - 'session_id', - 'chat_id', - 'expires_at', - 'agent_id', - 'volume_host_path', - 'endpoint_port', -) +SANDBOX_LABELS = ('session_id', 'chat_id', 'expires_at') class DockerSandboxRuntime(SandboxRuntime): @@ -40,8 +33,6 @@ class DockerSandboxRuntime(SandboxRuntime): *, session_id: UUID, chat_id: UUID, - agent_id: str, - volume_host_path: str, created_at: datetime, expires_at: datetime, ) -> SandboxSession: @@ -58,7 +49,6 @@ class DockerSandboxRuntime(SandboxRuntime): try: try: chat_path = self._chat_path(chat_id) - volume_path = self._request_host_path(volume_host_path) dependencies_path = self._readonly_host_path( self._config.dependencies_host_path ) @@ -69,42 +59,22 @@ class DockerSandboxRuntime(SandboxRuntime): container = self._client.containers.run( self._config.image, detach=True, - environment={**self._config.extra_env, 'AGENT_ID': agent_id}, - labels=self._labels( - session_id, - chat_id, - expires_at, - agent_id, - str(volume_path), - ), + labels=self._labels(session_id, chat_id, expires_at), mounts=self._mounts( chat_path, - volume_path, dependencies_path, lambda_tools_path, ), - network=self._config.network_name, ) - - try: - container_id = str(getattr(container, 'id', '')).strip() - if not container_id: - raise ValueError('invalid container id') - - endpoint = self._endpoint_from_container(container) - except (DockerException, OSError, ValueError) as exc: - self._remove_created_container(container, str(chat_id), exc) - raise SandboxStartError(str(chat_id), str(exc)) from exc - except SandboxStartError: - raise except (DockerException, OSError, ValueError) as exc: - raise SandboxStartError(str(chat_id), str(exc)) from exc + raise SandboxStartError(str(chat_id)) from exc + + container_id = str(getattr(container, 'id', '')).strip() + if not container_id: + raise SandboxStartError(str(chat_id)) result = 'created' span.set_attribute('container.id', container_id) - span.set_attribute('agent.id', agent_id) - span.set_attribute('sandbox.endpoint.ip', endpoint.ip) - span.set_attribute('sandbox.endpoint.port', endpoint.port) span.set_attribute('sandbox.result', result) return SandboxSession( session_id=session_id, @@ -113,9 +83,6 @@ class DockerSandboxRuntime(SandboxRuntime): status=SandboxStatus.RUNNING, created_at=created_at, expires_at=expires_at, - agent_id=agent_id, - volume_host_path=str(volume_path), - endpoint=endpoint, ) except Exception as exc: span.set_attribute('sandbox.result', result) @@ -165,39 +132,6 @@ class DockerSandboxRuntime(SandboxRuntime): attrs=_runtime_metric_attrs('stop', result), ) - def delete(self, container_id: str) -> None: - started_at = time.perf_counter() - result = 'error' - - with self._tracer.start_span( - 'adapter.docker.delete_sandbox', - attrs={'container.id': container_id}, - ) as span: - try: - container = self._client.containers.get(container_id) - _set_span_container_attrs(span, container) - container.remove(force=True) - result = 'deleted' - span.set_attribute('sandbox.result', result) - except NotFound: - result = 'not_found' - span.set_attribute('sandbox.result', result) - return - except DockerException as exc: - span.set_attribute('sandbox.result', result) - span.record_error(exc) - self._metrics.increment( - 'sandbox.runtime.error.total', - attrs=_runtime_error_metric_attrs('delete', type(exc).__name__), - ) - raise SandboxError('sandbox_delete_failed') from exc - finally: - self._metrics.record( - 'sandbox.runtime.delete.duration_ms', - _duration_ms(started_at), - attrs=_runtime_metric_attrs('delete', result), - ) - def list_active_sessions(self) -> list[SandboxSession]: started_at = time.perf_counter() result = 'error' @@ -245,22 +179,16 @@ class DockerSandboxRuntime(SandboxRuntime): session_id: UUID, chat_id: UUID, expires_at: datetime, - agent_id: str, - volume_host_path: str, ) -> dict[str, str]: return { 'session_id': str(session_id), 'chat_id': str(chat_id), 'expires_at': expires_at.isoformat(), - 'agent_id': agent_id, - 'volume_host_path': volume_host_path, - 'endpoint_port': str(self._config.agent_service_port), } def _mounts( self, chat_path: Path, - volume_path: Path, dependencies_path: Path, lambda_tools_path: Path, ) -> list[Mount]: @@ -282,11 +210,6 @@ class DockerSandboxRuntime(SandboxRuntime): type='bind', read_only=True, ), - Mount( - target=self._config.volume_mount_path, - source=str(volume_path), - type='bind', - ), ] def _chat_path(self, chat_id: UUID) -> Path: @@ -302,29 +225,6 @@ class DockerSandboxRuntime(SandboxRuntime): raise ValueError('invalid host path') return host_path - def _request_host_path(self, path_value: str) -> Path: - host_path = Path(path_value).expanduser() - if not host_path.is_absolute(): - raise ValueError('invalid host path') - return host_path.resolve(strict=False) - - def _remove_created_container( - self, - container: object, - chat_id: str, - error: Exception, - ) -> None: - remove = getattr(container, 'remove', None) - if not callable(remove): - raise SandboxStartError(chat_id) from error - - try: - remove(force=True) - except NotFound: - return - except Exception as exc: - raise SandboxStartError(chat_id) from exc - def _session_from_container(self, container: object) -> SandboxSession | None: container_id = str(getattr(container, 'id', '')).strip() labels = getattr(container, 'labels', None) @@ -334,14 +234,6 @@ class DockerSandboxRuntime(SandboxRuntime): try: session_id = UUID(labels['session_id']) chat_id = UUID(labels['chat_id']) - agent_id = labels['agent_id'] - volume_host_path = labels['volume_host_path'] - endpoint_port = int(labels['endpoint_port']) - if not isinstance(agent_id, str) or not isinstance(volume_host_path, str): - raise ValueError('invalid sandbox labels') - if not Path(volume_host_path).is_absolute() or endpoint_port <= 0: - raise ValueError('invalid sandbox labels') - endpoint = self._endpoint_from_container(container, endpoint_port) created_at = self._container_created_at(container) expires_at = _parse_datetime(labels['expires_at']) except (KeyError, TypeError, ValueError): @@ -354,13 +246,18 @@ class DockerSandboxRuntime(SandboxRuntime): status=SandboxStatus.RUNNING, created_at=created_at, expires_at=expires_at, - agent_id=agent_id, - volume_host_path=volume_host_path, - endpoint=endpoint, ) def _container_created_at(self, container: object) -> datetime: - attrs = self._container_attrs(container) + attrs = getattr(container, 'attrs', None) + if not isinstance(attrs, dict): + reload_container = getattr(container, 'reload', None) + if callable(reload_container): + reload_container() + attrs = getattr(container, 'attrs', None) + + if not isinstance(attrs, dict): + raise ValueError('invalid container attrs') raw_created_at = attrs.get('Created') if not isinstance(raw_created_at, str): @@ -368,42 +265,6 @@ class DockerSandboxRuntime(SandboxRuntime): return _parse_datetime(raw_created_at) - def _endpoint_from_container( - self, - container: object, - port: int | None = None, - ) -> SandboxEndpoint: - attrs = self._container_attrs(container) - network_settings = attrs.get('NetworkSettings') - if not isinstance(network_settings, dict): - raise ValueError('invalid endpoint') - - networks = network_settings.get('Networks') - if not isinstance(networks, dict): - raise ValueError('invalid endpoint') - - network = networks.get(self._config.network_name) - if not isinstance(network, dict): - raise ValueError('invalid endpoint') - - ip = network.get('IPAddress') - if not isinstance(ip, str) or not ip: - raise ValueError('invalid endpoint') - - endpoint_port = self._config.agent_service_port if port is None else port - return SandboxEndpoint(ip=ip, port=endpoint_port) - - def _container_attrs(self, container: object) -> dict[str, object]: - reload_container = getattr(container, 'reload', None) - if callable(reload_container): - reload_container() - - attrs = getattr(container, 'attrs', None) - if not isinstance(attrs, dict): - raise ValueError('invalid container attrs') - - return attrs - def _host_path(self, path_value: str) -> Path: return Path(path_value).expanduser().resolve(strict=False) diff --git a/adapter/http/fastapi/app.py b/adapter/http/fastapi/app.py index 7d63d1e..e9ba18f 100644 --- a/adapter/http/fastapi/app.py +++ b/adapter/http/fastapi/app.py @@ -1,8 +1,6 @@ import asyncio from collections.abc import Awaitable, Callable -from pathlib import Path -from docker.errors import NotFound from fastapi import FastAPI from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor @@ -49,25 +47,6 @@ def create_app(config: AppConfig | None = None) -> FastAPI: raise -def _ensure_sandbox_network(container: AppContainer) -> None: - client = container._docker_client - network_name = container.config.sandbox.network_name - try: - client.networks.get(network_name) - except NotFound: - client.networks.create(network_name) - container.observability.logger.info( - 'sandbox_network_created', - attrs={'network': network_name}, - ) - - -def _ensure_sandbox_dirs(container: AppContainer) -> None: - cfg = container.config.sandbox - for path_str in (cfg.dependencies_host_path, cfg.lambda_tools_host_path): - Path(path_str).mkdir(parents=True, exist_ok=True) - - def _build_startup_handler( app: FastAPI, container: AppContainer, @@ -77,8 +56,6 @@ def _build_startup_handler( if task is not None and not task.done(): return - await asyncio.to_thread(_ensure_sandbox_network, container) - await asyncio.to_thread(_ensure_sandbox_dirs, container) await asyncio.to_thread(container.sandbox_reconciler.execute) stop_event = asyncio.Event() diff --git a/adapter/http/fastapi/dependencies.py b/adapter/http/fastapi/dependencies.py index 222fd84..57af579 100644 --- a/adapter/http/fastapi/dependencies.py +++ b/adapter/http/fastapi/dependencies.py @@ -1,7 +1,7 @@ from fastapi import Depends, Request from adapter.di.container import AppContainer -from usecase.sandbox import CreateSandbox, DeleteSandbox +from usecase.sandbox import CreateSandbox APP_CONTAINER_STATE = 'container' APP_CONFIG_STATE = 'config' @@ -18,9 +18,3 @@ def get_create_sandbox( container: AppContainer = Depends(get_container), ) -> CreateSandbox: return container.usecases.create_sandbox - - -def get_delete_sandbox( - container: AppContainer = Depends(get_container), -) -> DeleteSandbox: - return container.usecases.delete_sandbox diff --git a/adapter/http/fastapi/routers/v1/router.py b/adapter/http/fastapi/routers/v1/router.py index cb71748..df713b1 100644 --- a/adapter/http/fastapi/routers/v1/router.py +++ b/adapter/http/fastapi/routers/v1/router.py @@ -1,30 +1,19 @@ -from uuid import UUID - from fastapi import APIRouter, Depends, HTTPException, status from adapter.di.container import AppContainer from adapter.http.fastapi.dependencies import ( get_container, get_create_sandbox, - get_delete_sandbox, ) from adapter.http.fastapi.schemas import ( CreateSandboxRequest, - DeleteSandboxResponse, ErrorResponse, HealthResponse, - SandboxEndpointResponse, SandboxSessionResponse, ) -from domain.error import SandboxConflictError, SandboxError, SandboxStartError +from domain.error import SandboxError, SandboxStartError from domain.sandbox import SandboxSession -from usecase.sandbox import ( - CreateSandbox, - CreateSandboxCommand, - DeleteSandbox, - DeleteSandboxCommand, - DeleteSandboxResult, -) +from usecase.sandbox import CreateSandbox, CreateSandboxCommand router = APIRouter() @@ -46,7 +35,6 @@ def health(container: AppContainer = Depends(get_container)) -> HealthResponse: '/create', response_model=SandboxSessionResponse, responses={ - status.HTTP_409_CONFLICT: {'model': ErrorResponse}, status.HTTP_503_SERVICE_UNAVAILABLE: {'model': ErrorResponse}, status.HTTP_500_INTERNAL_SERVER_ERROR: {'model': ErrorResponse}, }, @@ -57,23 +45,11 @@ def create_sandbox( usecase: CreateSandbox = Depends(get_create_sandbox), ) -> SandboxSessionResponse: try: - session = usecase.execute( - CreateSandboxCommand( - chat_id=request.chat_id, - agent_id=request.agent_id, - volume_host_path=request.volume_host_path, - ) - ) - except SandboxConflictError as exc: - raise HTTPException( - status_code=status.HTTP_409_CONFLICT, - detail=str(exc), - ) from exc + session = usecase.execute(CreateSandboxCommand(chat_id=request.chat_id)) except SandboxStartError as exc: - detail = f'{exc}: {exc.reason}' if exc.reason else str(exc) raise HTTPException( status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail=detail, + detail=str(exc), ) from exc except SandboxError as exc: raise HTTPException( @@ -84,55 +60,11 @@ def create_sandbox( return _to_sandbox_session_response(session) -@router.delete( - '/sandboxes/{chat_id}', - response_model=DeleteSandboxResponse, - responses={ - status.HTTP_500_INTERNAL_SERVER_ERROR: {'model': ErrorResponse}, - }, - status_code=status.HTTP_200_OK, -) -def delete_sandbox( - chat_id: UUID, - usecase: DeleteSandbox = Depends(get_delete_sandbox), -) -> DeleteSandboxResponse: - try: - result = usecase.execute(DeleteSandboxCommand(chat_id=chat_id)) - except SandboxError as exc: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=str(exc), - ) from exc - - return _to_delete_sandbox_response(result) - - def _to_sandbox_session_response(session: SandboxSession) -> SandboxSessionResponse: - if session.endpoint is None: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail='sandbox_endpoint_unavailable', - ) - return SandboxSessionResponse( session_id=session.session_id, chat_id=session.chat_id, - agent_id=session.agent_id, - volume_host_path=session.volume_host_path, container_id=session.container_id, - endpoint=SandboxEndpointResponse( - ip=session.endpoint.ip, - port=session.endpoint.port, - ), status=session.status.value, expires_at=session.expires_at, ) - - -def _to_delete_sandbox_response(result: DeleteSandboxResult) -> DeleteSandboxResponse: - return DeleteSandboxResponse( - chat_id=result.chat_id, - result=result.result, - session_id=result.session_id, - container_id=result.container_id, - ) diff --git a/adapter/http/fastapi/schemas.py b/adapter/http/fastapi/schemas.py index 98e7129..35992ee 100644 --- a/adapter/http/fastapi/schemas.py +++ b/adapter/http/fastapi/schemas.py @@ -1,8 +1,7 @@ from datetime import datetime -from pathlib import Path from uuid import UUID -from pydantic import BaseModel, ConfigDict, field_validator +from pydantic import BaseModel, ConfigDict class HealthResponse(BaseModel): @@ -15,47 +14,15 @@ class CreateSandboxRequest(BaseModel): model_config = ConfigDict(extra='forbid') chat_id: UUID - agent_id: str - volume_host_path: str - - @field_validator('agent_id') - @classmethod - def validate_agent_id(cls, value: str) -> str: - if not value.strip(): - raise ValueError('invalid agent_id') - return value - - @field_validator('volume_host_path') - @classmethod - def validate_volume_host_path(cls, value: str) -> str: - path = Path(value).expanduser() - if not path.is_absolute(): - raise ValueError('invalid volume_host_path') - return str(path.resolve(strict=False)) - - -class SandboxEndpointResponse(BaseModel): - ip: str - port: int class SandboxSessionResponse(BaseModel): session_id: UUID chat_id: UUID - agent_id: str - volume_host_path: str container_id: str - endpoint: SandboxEndpointResponse status: str expires_at: datetime -class DeleteSandboxResponse(BaseModel): - chat_id: UUID - result: str - session_id: UUID | None = None - container_id: str | None = None - - class ErrorResponse(BaseModel): detail: str diff --git a/config/app.yaml b/config/app.yaml index 77058af..0e729db 100644 --- a/config/app.yaml +++ b/config/app.yaml @@ -29,8 +29,6 @@ docker: sandbox: image: ai-agent:latest - network_name: sandbox - agent_service_port: 8000 ttl_seconds: 300 cleanup_interval_seconds: 60 chats_root: var/sandbox/chats @@ -39,8 +37,6 @@ sandbox: chat_mount_path: /workspace/chat dependencies_mount_path: /opt/dependencies lambda_tools_mount_path: /opt/lambda-tools - volume_mount_path: /workspace/volume - env: {} security: token_header: X-API-Token diff --git a/config/docker-compose.yml b/config/docker-compose.yml index 94b9e75..5ddb745 100644 --- a/config/docker-compose.yml +++ b/config/docker-compose.yml @@ -7,9 +7,9 @@ http: port: 8123 logging: - level: DEBUG - output: stdout - format: text + level: INFO + output: otel + format: json metrics: enabled: true @@ -25,29 +25,18 @@ otel: metric_export_interval: 1000 docker: - base_url: unix:///var/run/docker.sock + base_url: tcp://docker-engine:2375 sandbox: - image: mrkan0/lambda-agent:arm - network_name: sandbox - agent_service_port: 8000 - ttl_seconds: 3000 + image: nginx:1.27-alpine + ttl_seconds: 300 cleanup_interval_seconds: 60 - chats_root: /tmp/master-sandbox/chats - dependencies_host_path: /tmp/master-sandbox/dependencies - lambda_tools_host_path: /tmp/master-sandbox/lambda-tools + chats_root: /var/lib/master-sandbox/chats + dependencies_host_path: /var/lib/master-dependencies + lambda_tools_host_path: /var/lib/master-lambda-tools chat_mount_path: /workspace/chat dependencies_mount_path: /opt/dependencies lambda_tools_mount_path: /opt/lambda-tools - volume_mount_path: /workspace/volume - env: - LANGFUSE_PUBLIC_KEY: pk-lf- - LANGFUSE_SECRET_KEY: sk-lf- - LANGFUSE_HOST: http://localhost:5000 - PROVIDER_URL: http://host.docker.internal:8080/v1 - PROVIDER_API_KEY: "1234" - PROVIDER_MODEL: gemini-3.1-pro-preview - COMPOSIO_API_KEY: ck_r-3c8ClArmcHuSzK5TYu security: token_header: X-API-Token diff --git a/docker-compose.yml b/docker-compose.yml index f14b43a..24d5bab 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,6 +6,8 @@ services: target: run user: root depends_on: + docker-engine: + condition: service_healthy otel-collector: condition: service_started environment: @@ -15,9 +17,30 @@ services: - '127.0.0.1:8123:8123' volumes: - ./config/docker-compose.yml:/app/config/app.yaml:ro - - /var/run/docker.sock:/var/run/docker.sock - - /tmp/master-sandbox:/tmp/master-sandbox - - /tmp/master-volume:/tmp/master-volume + - sandbox-data:/var/lib/master-sandbox + - sandbox-dependencies:/var/lib/master-dependencies:ro + - sandbox-tools:/var/lib/master-lambda-tools:ro + + docker-engine: + image: docker:28-dind + privileged: true + environment: + DOCKER_TLS_CERTDIR: '' + command: + - --host=tcp://0.0.0.0:2375 + healthcheck: + test: + - CMD + - docker + - info + interval: 5s + timeout: 5s + retries: 12 + volumes: + - docker-data:/var/lib/docker + - sandbox-data:/var/lib/master-sandbox + - sandbox-dependencies:/var/lib/master-dependencies + - sandbox-tools:/var/lib/master-lambda-tools otel-collector: image: grafana/otel-lgtm:latest @@ -27,4 +50,8 @@ services: - lgtm-data:/data volumes: + docker-data: lgtm-data: + sandbox-data: + sandbox-dependencies: + sandbox-tools: diff --git a/docs/009-sandbox-http-control-and-runtime-params.md b/docs/009-sandbox-http-control-and-runtime-params.md deleted file mode 100644 index 5f63e18..0000000 --- a/docs/009-sandbox-http-control-and-runtime-params.md +++ /dev/null @@ -1,20 +0,0 @@ -# 009 Sandbox HTTP control and runtime params - -## Context -- Sandbox API must support explicit delete and richer create params -- Clients need an internal Docker-network endpoint for agent traffic -- MVP accepts trusted internal callers and does not enforce auth yet - -## Decision -- `POST /api/v1/create` accepts `chat_id`, `agent_id`, and absolute `volume_host_path` -- `AGENT_ID` is passed to the sandbox container environment -- The request volume is bind-mounted read-write at configured `volume_mount_path` -- Sandbox containers join configured Docker network `network_name` -- Create returns endpoint `ip:agent_service_port` from that Docker network -- Reuse is allowed only when `agent_id` and `volume_host_path` match -- Mismatch returns sandbox conflict without starting a new container -- `DELETE /api/v1/sandboxes/{chat_id}` deletes the active sandbox without auth - -## Consequences -- Absolute host path is accepted as an MVP risk -- External clients must run in or join the configured Docker network diff --git a/docs/009-storage-foundation.md b/docs/009-storage-foundation.md new file mode 100644 index 0000000..4aadf58 --- /dev/null +++ b/docs/009-storage-foundation.md @@ -0,0 +1,17 @@ +# 009 Storage foundation + +## Context +- v1 storage slice needs workspace, chat and file flows before durable DB +- trusted caller passes `user_id`, and one workspace belongs to one user +- chat content must live outside sandbox lifecycle and survive sandbox restart + +## Decision +- metadata repositories are in-memory for the first storage slice +- `Workspace`, `Chat` and `ChatFile` are first-class domain entities +- filesystem access stays behind storage ports in outer layers +- sandbox later integrates through chat metadata and storage ports, not raw path math in usecases + +## Consequences +- metadata is lost on restart in this phase +- storage usecases and HTTP API can be built before durable persistence +- later durable metadata can replace in-memory adapters behind the same ports diff --git a/docs/010-chat-history-policy.md b/docs/010-chat-history-policy.md new file mode 100644 index 0000000..39c2125 --- /dev/null +++ b/docs/010-chat-history-policy.md @@ -0,0 +1,17 @@ +# 010 Chat history policy + +## Context +- v1 keeps chat history in filesystem, not in central DB +- chat metadata must not depend on parsing history content +- each chat already maps to an isolated working directory + +## Decision +- each chat owns one `history.md` inside its chat directory +- `history.md` is created with chat layout initialization +- chat metadata stores identity and lifecycle fields separately from history content +- history read and write stay behind storage ports in outer layers + +## Consequences +- history survives sandbox restart with chat storage +- metadata and content evolve independently +- later migration to another history backend can keep the same chat identity model diff --git a/domain/chat.py b/domain/chat.py new file mode 100644 index 0000000..3dbd7cb --- /dev/null +++ b/domain/chat.py @@ -0,0 +1,35 @@ +from dataclasses import dataclass +from datetime import datetime +from uuid import UUID + +HISTORY_FILE_NAME = 'history.md' + + +@dataclass(frozen=True, slots=True) +class ChatAttachmentName: + value: str + + def __post_init__(self) -> None: + if not self.value or self.value in {'.', '..'}: + raise ValueError('invalid attachment name') + if '/' in self.value or '\\' in self.value: + raise ValueError('invalid attachment name') + if self.value == HISTORY_FILE_NAME: + raise ValueError('reserved attachment name') + + +@dataclass(frozen=True, slots=True) +class Chat: + chat_id: UUID + workspace_id: UUID + created_at: datetime + + +@dataclass(frozen=True, slots=True) +class ChatFile: + file_id: UUID + chat_id: UUID + name: ChatAttachmentName + content_type: str | None + size_bytes: int + created_at: datetime diff --git a/domain/error.py b/domain/error.py index ca3a143..ff3486e 100644 --- a/domain/error.py +++ b/domain/error.py @@ -1,3 +1,6 @@ +from uuid import UUID + + class DomainError(Exception): pass @@ -18,24 +21,59 @@ class UserConflictError(UserError): self.email = email +class WorkspaceError(DomainError): + pass + + +class WorkspaceNotFoundError(WorkspaceError): + def __init__(self, workspace_id: UUID) -> None: + super().__init__('workspace_not_found') + self.workspace_id = workspace_id + + +class WorkspaceQuotaExceededError(WorkspaceError): + def __init__(self, workspace_id: UUID) -> None: + super().__init__('workspace_quota_exceeded') + self.workspace_id = workspace_id + + +class ChatError(DomainError): + pass + + +class ChatNotFoundError(ChatError): + def __init__(self, chat_id: UUID) -> None: + super().__init__('chat_not_found') + self.chat_id = chat_id + + +class ChatHasActiveSandboxError(ChatError): + def __init__(self, chat_id: UUID) -> None: + super().__init__('chat_has_active_sandbox') + self.chat_id = chat_id + + +class ChatFileError(DomainError): + pass + + +class ChatFileNotFoundError(ChatFileError): + def __init__(self, file_id: UUID) -> None: + super().__init__('chat_file_not_found') + self.file_id = file_id + + class SandboxError(DomainError): pass class SandboxStartError(SandboxError): - def __init__(self, chat_id: str, reason: str = '') -> None: + def __init__(self, chat_id: str) -> None: super().__init__('sandbox_start_failed') self.chat_id = chat_id - self.reason = reason class SandboxAlreadyRunningError(SandboxError): def __init__(self, chat_id: str) -> None: super().__init__('sandbox_already_running') self.chat_id = chat_id - - -class SandboxConflictError(SandboxError): - def __init__(self, chat_id: str) -> None: - super().__init__('sandbox_conflict') - self.chat_id = chat_id diff --git a/domain/sandbox.py b/domain/sandbox.py index 324bbe4..a9b0f40 100644 --- a/domain/sandbox.py +++ b/domain/sandbox.py @@ -12,12 +12,6 @@ class SandboxStatus(str, Enum): FAILED = 'failed' -@dataclass(frozen=True, slots=True) -class SandboxEndpoint: - ip: str - port: int - - @dataclass(frozen=True, slots=True) class SandboxSession: session_id: UUID @@ -26,6 +20,3 @@ class SandboxSession: status: SandboxStatus created_at: datetime expires_at: datetime - agent_id: str = '' - volume_host_path: str = '' - endpoint: SandboxEndpoint | None = None diff --git a/domain/workspace.py b/domain/workspace.py new file mode 100644 index 0000000..3526203 --- /dev/null +++ b/domain/workspace.py @@ -0,0 +1,17 @@ +from dataclasses import dataclass +from datetime import datetime +from uuid import UUID + + +@dataclass(frozen=True, slots=True) +class Workspace: + workspace_id: UUID + user_id: UUID + created_at: datetime + + +@dataclass(frozen=True, slots=True) +class WorkspaceUsage: + workspace_id: UUID + used_bytes: int + quota_bytes: int diff --git a/tasks.md b/tasks.md index 3fbdfe9..d5009d7 100644 --- a/tasks.md +++ b/tasks.md @@ -359,124 +359,3 @@ - Scope: подтвердить, что M27-M28 закрыли remaining M26 замечания - Файлы: весь измененный код после `M27`-`M28` - Критерии приемки: нет замечаний по rollback gap и startup failure observability coverage; sandbox observability slice приемлем as-is - -## Follow-up: sandbox HTTP delete, runtime endpoint, agent env и request volume - -### M30. ADR и контракты sandbox runtime params/control - -- Исполнитель: `primary-agent` -- Статус: completed -- Зависимости: нет -- Commit required: no -- Scope: зафиксировать решение в ADR-lite и подготовить минимальные domain/usecase контракты для `agent_id`, host volume, endpoint и delete sandbox -- Файлы: `docs/009-sandbox-http-control-and-runtime-params.md`, `domain/sandbox.py`, `domain/error.py`, `usecase/interface.py`, `usecase/sandbox.py` -- Решение: create принимает `agent_id` и absolute `volume_host_path`; reuse разрешен только при совпадении параметров; delete выполняется по `chat_id`; response содержит Docker-network endpoint `ip:port` -- Критерии приемки: ADR занимает 10-20 строк; в domain есть endpoint/session metadata; usecase contracts не импортируют Docker/FastAPI; есть stubs для `DeleteSandbox` без бизнес-логики - -### M31. Typed config для sandbox network и runtime port - -- Субагент: `feature-developer` -- Статус: completed -- Зависимости: `M30` -- Commit required: no -- Scope: добавить typed-config настройки Docker network, agent service port и mount target для request volume -- Файлы: `adapter/config/model.py`, `adapter/config/loader.py`, `config/app.yaml`, при необходимости `config/docker-compose.yml`, tests config loader -- Решение: добавить `sandbox.network_name: sandbox`, `sandbox.agent_service_port: 8000`, `sandbox.volume_mount_path: /workspace/volume`; network должен существовать заранее -- Критерии приемки: config собирается в dataclass tree; env overrides поддержаны; inner layers не читают env/YAML; defaults отражены в локальном config - -### M32. Docker runtime для env, volume, network endpoint и delete - -- Субагент: `feature-developer` -- Статус: completed -- Зависимости: `M30`, `M31` -- Commit required: no -- Scope: обновить Docker adapter create/delete и reconciliation под новые sandbox runtime параметры -- Файлы: `adapter/docker/runtime.py`, `adapter/sandbox/reconciliation.py`, при необходимости focused tests helpers -- Решение: `containers.run` получает `environment={'AGENT_ID': agent_id}`, `network=config.sandbox.network_name`, extra bind mount `volume_host_path -> volume_mount_path`; после start runtime достает IP из configured network и возвращает endpoint; delete делает `remove(force=True)` и NotFound считает идемпотентным успехом; labels содержат `agent_id`, `volume_host_path` и endpoint port metadata для reconciliation -- Критерии приемки: Docker details остаются в adapter; mount policy сохраняет chat `rw`, deps/tools `ro`, request volume `rw`; create возвращает endpoint; startup reconciliation восстанавливает новые session fields; observability duration/error metrics не регрессируют - -### M33. CreateSandbox параметры и conflict semantics - -- Субагент: `feature-developer` -- Статус: completed -- Зависимости: `M30`, `M32` -- Commit required: no -- Scope: обновить create usecase под `agent_id`, `volume_host_path`, endpoint response и reuse conflict rules -- Файлы: `usecase/sandbox.py`, `domain/error.py`, `adapter/di/container.py`, при необходимости `repository/sandbox_session.py` -- Решение: активная сессия переиспользуется только если `agent_id` и `volume_host_path` совпадают; при mismatch usecase поднимает sandbox conflict error; replace/cleanup rollback paths сохраняют прежнюю консистентность `sandbox.active.count` -- Критерии приемки: одна active sandbox на `chat_id`; mismatch не стартует новый контейнер; endpoint возвращается через domain session; usecase не импортирует Docker/FastAPI - -### M34. DeleteSandbox usecase и DI wiring - -- Субагент: `feature-developer` -- Статус: completed -- Зависимости: `M30`, `M32` -- Commit required: no -- Scope: реализовать usecase удаления sandbox по `chat_id` и подключить его в container -- Файлы: `usecase/sandbox.py`, `usecase/interface.py`, `adapter/di/container.py` -- Решение: под per-chat lock найти active session, вызвать `runtime.delete(container_id)`, удалить registry entry и обновить `sandbox.active.count`; missing session возвращает idempotent `not_found` result -- Критерии приемки: delete-vs-create сериализован тем же locker; NotFound runtime path не ломает идемпотентность; lifecycle metrics/logs/traces отражают deleted/not_found/error outcomes - -### M35. HTTP schemas/routes для create params и delete endpoint - -- Субагент: `feature-developer` -- Статус: completed -- Зависимости: `M33`, `M34` -- Commit required: no -- Scope: обновить FastAPI adapter под расширенный create request/response и добавить delete endpoint без auth -- Файлы: `adapter/http/fastapi/schemas.py`, `adapter/http/fastapi/dependencies.py`, `adapter/http/fastapi/routers/v1/router.py` -- Решение: `POST /api/v1/create` принимает `chat_id`, `agent_id`, `volume_host_path`; response содержит `agent_id`, `volume_host_path`, `endpoint`; `DELETE /api/v1/sandboxes/{chat_id}` возвращает `deleted` или `not_found`; conflict мапится в `409` -- Критерии приемки: router остается тонким; HTTP models остаются в FastAPI adapter; path/request validation не переносится во внутренние слои; auth не добавляется - -### M36. Тесты для delete, endpoint, env и volume mapping - -- Субагент: `test-engineer` -- Статус: completed -- Зависимости: `M31`, `M32`, `M33`, `M34`, `M35` -- Commit required: no -- Scope: покрыть новую sandbox control surface без реального production Docker stack -- Файлы: `test/test_sandbox_usecase.py`, `test/test_docker_runtime.py`, `test/test_create_http.py`, при необходимости новые focused tests -- Критерии приемки: есть tests на create request params, reuse match, reuse mismatch `409`, delete `deleted/not_found`, Docker env `AGENT_ID`, request volume bind `rw`, configured network, endpoint IP/port extraction и reconciliation новых labels; `make typecheck` и relevant pytest проходят - -### M37. Boundary review для sandbox HTTP control changes - -- Субагент: `code-reviewer` -- Статус: completed -- Зависимости: `M36` -- Commit required: no -- Scope: проверить clean architecture, boundary rules и соответствие согласованному MVP scope -- Файлы: весь измененный код после `M30`-`M36` -- Критерии приемки: Docker/FastAPI не протекают во внутренние слои; absolute host path явно ограничен как MVP-риск; dependency direction сохранен; delete/create race не нарушает one-sandbox-per-chat; замечания сведены к minor или отсутствуют - -## Follow-up после M37 boundary review - -### M38. Исправить rollback endpoint failure и canonical volume path - -- Субагент: `feature-developer` -- Статус: completed -- Зависимости: `M37` -- Commit required: no -- Scope: закрыть must/should-fix замечания M37 без смены архитектуры -- Файлы: `adapter/docker/runtime.py`, `adapter/http/fastapi/schemas.py`, `usecase/sandbox.py`, при необходимости focused tests helpers -- Решение: при ошибке после успешного `containers.run` удалить новый container до `SandboxStartError`; canonicalize `volume_host_path` на HTTP boundary; сделать `agent_id` и `volume_host_path` обязательными в `CreateSandboxCommand` -- Критерии приемки: endpoint extraction failure не оставляет untracked running container; повторный create с эквивалентным path не конфликтует из-за raw/canonical mismatch; inner create command больше не допускает пустые default params - -### M39. Регрессии для M37 review fixes - -- Субагент: `test-engineer` -- Статус: completed -- Зависимости: `M38` -- Commit required: no -- Scope: добавить tests на rollback после endpoint failure и canonical volume path reuse, обновить тесты под required command params -- Файлы: `test/test_docker_runtime.py`, `test/test_create_http.py`, `test/test_sandbox_usecase.py` -- Критерии приемки: endpoint failure удаляет созданный container; HTTP canonicalizes volume path до usecase command; `CreateSandboxCommand` без params больше не используется; `make lint`, `make typecheck`, `make test` проходят - -### M40. Повторный boundary review для sandbox HTTP control changes - -- Субагент: `code-reviewer` -- Статус: completed -- Зависимости: `M39` -- Commit required: no -- Scope: подтвердить, что M38-M39 закрыли M37 findings без новых boundary нарушений -- Файлы: весь измененный код после `M38`-`M39` -- Критерии приемки: M37 must/should-fix закрыты; clean architecture соблюдена; замечания сведены к minor или отсутствуют diff --git a/test/test_create_http.py b/test/test_create_http.py index f9d6382..ae302c2 100644 --- a/test/test_create_http.py +++ b/test/test_create_http.py @@ -1,7 +1,6 @@ import asyncio import json from datetime import UTC, datetime, timedelta -from typing import Any from uuid import UUID import pytest @@ -28,26 +27,16 @@ from adapter.http.fastapi import app as app_module from adapter.observability.noop import NoopMetrics, NoopTracer from adapter.observability.runtime import ObservabilityRuntime from adapter.sandbox.reconciliation import SandboxSessionReconciler -from domain.error import SandboxConflictError, SandboxError, SandboxStartError -from domain.sandbox import SandboxEndpoint, SandboxSession, SandboxStatus +from domain.error import SandboxError, SandboxStartError +from domain.sandbox import SandboxSession, SandboxStatus from repository.sandbox_lock import ProcessLocalSandboxLifecycleLocker from repository.sandbox_session import InMemorySandboxSessionRepository from usecase.interface import Attrs -from usecase.sandbox import ( - CleanupExpiredSandboxes, - CreateSandbox, - CreateSandboxCommand, - DeleteSandbox, - DeleteSandboxCommand, - DeleteSandboxResult, -) +from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox, CreateSandboxCommand CHAT_ID = UUID('123e4567-e89b-12d3-a456-426614174000') NON_CANONICAL_CHAT_ID = '123E4567E89B12D3A456426614174000' SESSION_ID = UUID('00000000-0000-0000-0000-000000000011') -AGENT_ID = 'agent-alpha' -VOLUME_HOST_PATH = '/srv/sandbox/request-volume' -ENDPOINT = SandboxEndpoint(ip='172.20.0.8', port=8000) class FakeLogger: @@ -93,39 +82,15 @@ class FakeCleanupExpiredSandboxes(CleanupExpiredSandboxes): return [] -class FakeDeleteSandboxUsecase(DeleteSandbox): - def __init__(self, result: DeleteSandboxResult | None = None) -> None: - self._result = result - self.commands: list[DeleteSandboxCommand] = [] - - def execute(self, command: DeleteSandboxCommand) -> DeleteSandboxResult: - self.commands.append(command) - if self._result is None: - return DeleteSandboxResult(chat_id=command.chat_id, result='not_found') - return self._result - - class FakeDockerClient(DockerClient): def __init__(self, base_url: str | None = None) -> None: self.base_url = base_url self.close_calls = 0 - @property # type: ignore[override] - def networks(self) -> Any: - return _FakeNetworks() - def close(self) -> None: self.close_calls += 1 -class _FakeNetworks: - def get(self, name: str) -> None: - return None - - def create(self, name: str) -> None: - return None - - class EmptySandboxState: def __init__(self) -> None: self.calls = 0 @@ -232,18 +197,10 @@ class FakeLifecycleRuntime: *, session_id: UUID, chat_id: UUID, - agent_id: str, - volume_host_path: str, created_at: datetime, expires_at: datetime, ) -> SandboxSession: - self.create_calls.append( - CreateSandboxCommand( - chat_id=chat_id, - agent_id=agent_id, - volume_host_path=volume_host_path, - ) - ) + self.create_calls.append(CreateSandboxCommand(chat_id=chat_id)) session = SandboxSession( session_id=session_id, chat_id=chat_id, @@ -251,9 +208,6 @@ class FakeLifecycleRuntime: status=SandboxStatus.RUNNING, created_at=created_at, expires_at=expires_at, - agent_id=agent_id, - volume_host_path=volume_host_path, - endpoint=ENDPOINT, ) self._sessions = [ existing for existing in self._sessions if existing.chat_id != chat_id @@ -264,9 +218,6 @@ class FakeLifecycleRuntime: def stop(self, container_id: str) -> None: self.stop_calls.append(container_id) - def delete(self, container_id: str) -> None: - self.stop_calls.append(container_id) - class FixedSandboxState: def __init__(self, sessions: list[SandboxSession]) -> None: @@ -336,8 +287,6 @@ def build_config() -> AppConfig: docker=DockerConfig(base_url='unix:///var/run/docker.sock'), sandbox=SandboxConfig( image='sandbox:latest', - network_name='sandbox', - agent_service_port=8000, ttl_seconds=300, cleanup_interval_seconds=60, chats_root='/tmp/chats', @@ -346,8 +295,6 @@ def build_config() -> AppConfig: chat_mount_path='/workspace/chat', dependencies_mount_path='/workspace/dependencies', lambda_tools_mount_path='/workspace/lambda-tools', - volume_mount_path='/workspace/volume', - extra_env={}, ), security=SecurityConfig( token_header='Authorization', @@ -363,7 +310,6 @@ def build_container( cleanup_usecase: CleanupExpiredSandboxes, logger: FakeLogger, docker_client: FakeDockerClient, - delete_sandbox_usecase: DeleteSandbox | None = None, sandbox_reconciler: SandboxSessionReconciler | None = None, ) -> AppContainer: observability = ObservabilityRuntime( @@ -384,7 +330,6 @@ def build_container( usecases = AppUsecases( create_sandbox=create_sandbox_usecase, cleanup_expired_sandboxes=cleanup_usecase, - delete_sandbox=delete_sandbox_usecase or FakeDeleteSandboxUsecase(), ) return AppContainer( config=config, @@ -474,10 +419,6 @@ async def get_json(app: FastAPI, path: str) -> tuple[int, dict[str, object]]: return await request_json(app, 'GET', path) -async def delete_json(app: FastAPI, path: str) -> tuple[int, dict[str, object]]: - return await request_json(app, 'DELETE', path) - - async def exercise_create_request( app: FastAPI, payload: dict[str, str], @@ -504,19 +445,6 @@ async def exercise_get_request( await app.router.shutdown() -async def exercise_delete_request( - app: FastAPI, - path: str, -) -> tuple[int, dict[str, object]]: - await app.router.startup() - try: - status, response = await delete_json(app, path) - await asyncio.sleep(0) - return status, response - finally: - await app.router.shutdown() - - def test_post_create_returns_session_with_canonical_chat_id(monkeypatch) -> None: config = build_config() expires_at = datetime(2026, 4, 2, 12, 5, tzinfo=UTC) @@ -527,9 +455,6 @@ def test_post_create_returns_session_with_canonical_chat_id(monkeypatch) -> None status=SandboxStatus.RUNNING, created_at=expires_at - timedelta(minutes=5), expires_at=expires_at, - agent_id=AGENT_ID, - volume_host_path=VOLUME_HOST_PATH, - endpoint=ENDPOINT, ) logger = FakeLogger() create_usecase = FakeCreateSandboxUsecase(session=session) @@ -550,31 +475,19 @@ def test_post_create_returns_session_with_canonical_chat_id(monkeypatch) -> None app = app_module.create_app(config=config) status_code, response = asyncio.run( - exercise_create_request( - app, - { - 'chat_id': NON_CANONICAL_CHAT_ID, - 'agent_id': AGENT_ID, - 'volume_host_path': VOLUME_HOST_PATH, - }, - ) + exercise_create_request(app, {'chat_id': NON_CANONICAL_CHAT_ID}) ) assert status_code == 200 assert response == { 'session_id': str(SESSION_ID), 'chat_id': str(CHAT_ID), - 'agent_id': AGENT_ID, - 'volume_host_path': VOLUME_HOST_PATH, 'container_id': 'container-123', - 'endpoint': {'ip': '172.20.0.8', 'port': 8000}, 'status': 'running', 'expires_at': '2026-04-02T12:05:00Z', } assert len(create_usecase.commands) == 1 assert create_usecase.commands[0].chat_id == CHAT_ID - assert create_usecase.commands[0].agent_id == AGENT_ID - assert create_usecase.commands[0].volume_host_path == VOLUME_HOST_PATH assert cleanup_usecase.calls >= 1 assert any( message == 'http_request' @@ -585,55 +498,6 @@ def test_post_create_returns_session_with_canonical_chat_id(monkeypatch) -> None assert docker_client.close_calls == 1 -def test_post_create_canonicalizes_volume_path_before_usecase(monkeypatch) -> None: - config = build_config() - expires_at = datetime(2026, 4, 2, 12, 5, tzinfo=UTC) - session = SandboxSession( - session_id=SESSION_ID, - chat_id=CHAT_ID, - container_id='container-123', - status=SandboxStatus.RUNNING, - created_at=expires_at - timedelta(minutes=5), - expires_at=expires_at, - agent_id=AGENT_ID, - volume_host_path=VOLUME_HOST_PATH, - endpoint=ENDPOINT, - ) - logger = FakeLogger() - create_usecase = FakeCreateSandboxUsecase(session=session) - cleanup_usecase = FakeCleanupExpiredSandboxes() - docker_client = FakeDockerClient() - container = build_container( - config, - create_usecase, - cleanup_usecase, - logger, - docker_client, - ) - monkeypatch.setattr(app_module, 'build_container', lambda **kwargs: container) - monkeypatch.setattr( - app_module.FastAPIInstrumentor, 'instrument_app', lambda *args, **kwargs: None - ) - app = app_module.create_app(config=config) - - status_code, response = asyncio.run( - exercise_create_request( - app, - { - 'chat_id': str(CHAT_ID), - 'agent_id': AGENT_ID, - 'volume_host_path': '/srv/sandbox/a/../request-volume', - }, - ) - ) - - assert status_code == 200 - assert response['volume_host_path'] == VOLUME_HOST_PATH - assert len(create_usecase.commands) == 1 - assert create_usecase.commands[0].volume_host_path == VOLUME_HOST_PATH - assert docker_client.close_calls == 1 - - def test_post_create_rejects_non_uuid_chat_id(monkeypatch) -> None: config = build_config() expires_at = datetime(2026, 4, 2, 12, 5, tzinfo=UTC) @@ -664,14 +528,7 @@ def test_post_create_rejects_non_uuid_chat_id(monkeypatch) -> None: app = app_module.create_app(config=config) status_code, response = asyncio.run( - exercise_create_request( - app, - { - 'chat_id': 'x/../y', - 'agent_id': AGENT_ID, - 'volume_host_path': VOLUME_HOST_PATH, - }, - ) + exercise_create_request(app, {'chat_id': 'x/../y'}) ) assert status_code == 422 @@ -680,94 +537,6 @@ def test_post_create_rejects_non_uuid_chat_id(monkeypatch) -> None: assert docker_client.close_calls == 1 -@pytest.mark.parametrize( - 'payload', - [ - {'chat_id': str(CHAT_ID), 'volume_host_path': VOLUME_HOST_PATH}, - {'chat_id': str(CHAT_ID), 'agent_id': AGENT_ID, 'volume_host_path': 'relative'}, - ], -) -def test_post_create_rejects_missing_or_invalid_runtime_params( - monkeypatch, - payload: dict[str, str], -) -> None: - config = build_config() - logger = FakeLogger() - create_usecase = FakeCreateSandboxUsecase( - session=SandboxSession( - session_id=SESSION_ID, - chat_id=CHAT_ID, - container_id='container-123', - status=SandboxStatus.RUNNING, - created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC), - expires_at=datetime(2026, 4, 2, 12, 5, tzinfo=UTC), - agent_id=AGENT_ID, - volume_host_path=VOLUME_HOST_PATH, - endpoint=ENDPOINT, - ) - ) - cleanup_usecase = FakeCleanupExpiredSandboxes() - docker_client = FakeDockerClient() - container = build_container( - config, - create_usecase, - cleanup_usecase, - logger, - docker_client, - ) - monkeypatch.setattr(app_module, 'build_container', lambda **kwargs: container) - monkeypatch.setattr( - app_module.FastAPIInstrumentor, 'instrument_app', lambda *args, **kwargs: None - ) - - app = app_module.create_app(config=config) - - status_code, response = asyncio.run(exercise_create_request(app, payload)) - - assert status_code == 422 - assert 'detail' in response - assert create_usecase.commands == [] - assert docker_client.close_calls == 1 - - -def test_post_create_maps_conflict_to_conflict_response(monkeypatch) -> None: - config = build_config() - logger = FakeLogger() - create_usecase = FakeCreateSandboxUsecase( - error=SandboxConflictError(str(CHAT_ID)) - ) - cleanup_usecase = FakeCleanupExpiredSandboxes() - docker_client = FakeDockerClient() - container = build_container( - config, - create_usecase, - cleanup_usecase, - logger, - docker_client, - ) - monkeypatch.setattr(app_module, 'build_container', lambda **kwargs: container) - monkeypatch.setattr( - app_module.FastAPIInstrumentor, 'instrument_app', lambda *args, **kwargs: None - ) - - app = app_module.create_app(config=config) - - status_code, response = asyncio.run( - exercise_create_request( - app, - { - 'chat_id': str(CHAT_ID), - 'agent_id': AGENT_ID, - 'volume_host_path': VOLUME_HOST_PATH, - }, - ) - ) - - assert status_code == 409 - assert response == {'detail': 'sandbox_conflict'} - assert docker_client.close_calls == 1 - - def test_post_create_maps_start_errors_to_service_unavailable(monkeypatch) -> None: config = build_config() logger = FakeLogger() @@ -789,14 +558,7 @@ def test_post_create_maps_start_errors_to_service_unavailable(monkeypatch) -> No app = app_module.create_app(config=config) status_code, response = asyncio.run( - exercise_create_request( - app, - { - 'chat_id': str(CHAT_ID), - 'agent_id': AGENT_ID, - 'volume_host_path': VOLUME_HOST_PATH, - }, - ) + exercise_create_request(app, {'chat_id': str(CHAT_ID)}) ) assert status_code == 503 @@ -825,14 +587,7 @@ def test_post_create_maps_generic_sandbox_errors_to_internal_error(monkeypatch) app = app_module.create_app(config=config) status_code, response = asyncio.run( - exercise_create_request( - app, - { - 'chat_id': str(CHAT_ID), - 'agent_id': AGENT_ID, - 'volume_host_path': VOLUME_HOST_PATH, - }, - ) + exercise_create_request(app, {'chat_id': str(CHAT_ID)}) ) assert status_code == 500 @@ -840,89 +595,6 @@ def test_post_create_maps_generic_sandbox_errors_to_internal_error(monkeypatch) assert docker_client.close_calls == 1 -def test_delete_sandbox_endpoint_returns_deleted(monkeypatch) -> None: - config = build_config() - logger = FakeLogger() - create_usecase = FakeCreateSandboxUsecase(error=AssertionError('unused')) - cleanup_usecase = FakeCleanupExpiredSandboxes() - delete_usecase = FakeDeleteSandboxUsecase( - DeleteSandboxResult( - chat_id=CHAT_ID, - result='deleted', - session_id=SESSION_ID, - container_id='container-123', - ) - ) - docker_client = FakeDockerClient() - container = build_container( - config, - create_usecase, - cleanup_usecase, - logger, - docker_client, - delete_sandbox_usecase=delete_usecase, - ) - monkeypatch.setattr(app_module, 'build_container', lambda **kwargs: container) - monkeypatch.setattr( - app_module.FastAPIInstrumentor, 'instrument_app', lambda *args, **kwargs: None - ) - - app = app_module.create_app(config=config) - - status_code, response = asyncio.run( - exercise_delete_request(app, f'/api/v1/sandboxes/{CHAT_ID}') - ) - - assert status_code == 200 - assert response == { - 'chat_id': str(CHAT_ID), - 'result': 'deleted', - 'session_id': str(SESSION_ID), - 'container_id': 'container-123', - } - assert delete_usecase.commands == [DeleteSandboxCommand(chat_id=CHAT_ID)] - assert docker_client.close_calls == 1 - - -def test_delete_sandbox_endpoint_returns_not_found(monkeypatch) -> None: - config = build_config() - logger = FakeLogger() - create_usecase = FakeCreateSandboxUsecase(error=AssertionError('unused')) - cleanup_usecase = FakeCleanupExpiredSandboxes() - delete_usecase = FakeDeleteSandboxUsecase( - DeleteSandboxResult(chat_id=CHAT_ID, result='not_found') - ) - docker_client = FakeDockerClient() - container = build_container( - config, - create_usecase, - cleanup_usecase, - logger, - docker_client, - delete_sandbox_usecase=delete_usecase, - ) - monkeypatch.setattr(app_module, 'build_container', lambda **kwargs: container) - monkeypatch.setattr( - app_module.FastAPIInstrumentor, 'instrument_app', lambda *args, **kwargs: None - ) - - app = app_module.create_app(config=config) - - status_code, response = asyncio.run( - exercise_delete_request(app, f'/api/v1/sandboxes/{CHAT_ID}') - ) - - assert status_code == 200 - assert response == { - 'chat_id': str(CHAT_ID), - 'result': 'not_found', - 'session_id': None, - 'container_id': None, - } - assert delete_usecase.commands == [DeleteSandboxCommand(chat_id=CHAT_ID)] - assert docker_client.close_calls == 1 - - def test_startup_reconciliation_reuses_existing_container_after_restart( monkeypatch, ) -> None: @@ -935,9 +607,6 @@ def test_startup_reconciliation_reuses_existing_container_after_restart( status=SandboxStatus.RUNNING, created_at=created_at, expires_at=created_at + timedelta(minutes=5), - agent_id=AGENT_ID, - volume_host_path=VOLUME_HOST_PATH, - endpoint=ENDPOINT, ) logger = FakeLogger() docker_client = FakeDockerClient() @@ -949,7 +618,6 @@ def test_startup_reconciliation_reuses_existing_container_after_restart( tracer=NoopTracer(), ) repositories = AppRepositories(sandbox_session=repository) - locker = ProcessLocalSandboxLifecycleLocker() reconciler = SandboxSessionReconciler( state_source=runtime, registry=repository, @@ -960,7 +628,7 @@ def test_startup_reconciliation_reuses_existing_container_after_restart( usecases = AppUsecases( create_sandbox=CreateSandbox( repository=repository, - locker=locker, + locker=ProcessLocalSandboxLifecycleLocker(), runtime=runtime, clock=FakeClock(created_at), logger=logger, @@ -970,21 +638,13 @@ def test_startup_reconciliation_reuses_existing_container_after_restart( ), cleanup_expired_sandboxes=CleanupExpiredSandboxes( repository=repository, - locker=locker, + locker=ProcessLocalSandboxLifecycleLocker(), runtime=runtime, clock=FakeClock(created_at), logger=logger, metrics=NoopMetrics(), tracer=NoopTracer(), ), - delete_sandbox=DeleteSandbox( - repository=repository, - locker=locker, - runtime=runtime, - logger=logger, - metrics=NoopMetrics(), - tracer=NoopTracer(), - ), ) container = AppContainer( config=config, @@ -1002,24 +662,14 @@ def test_startup_reconciliation_reuses_existing_container_after_restart( app = app_module.create_app(config=config) status_code, response = asyncio.run( - exercise_create_request( - app, - { - 'chat_id': str(CHAT_ID), - 'agent_id': AGENT_ID, - 'volume_host_path': VOLUME_HOST_PATH, - }, - ) + exercise_create_request(app, {'chat_id': str(CHAT_ID)}) ) assert status_code == 200 assert response == { 'session_id': str(SESSION_ID), 'chat_id': str(CHAT_ID), - 'agent_id': AGENT_ID, - 'volume_host_path': VOLUME_HOST_PATH, 'container_id': 'container-123', - 'endpoint': {'ip': '172.20.0.8', 'port': 8000}, 'status': 'running', 'expires_at': '2026-04-02T12:05:00Z', } diff --git a/test/test_docker_runtime.py b/test/test_docker_runtime.py index 4eaff61..7f71275 100644 --- a/test/test_docker_runtime.py +++ b/test/test_docker_runtime.py @@ -1,4 +1,3 @@ -from dataclasses import replace from datetime import UTC, datetime, timedelta from pathlib import Path from types import TracebackType @@ -14,51 +13,22 @@ from adapter.config.model import SandboxConfig from adapter.docker.runtime import DockerSandboxRuntime from adapter.observability.noop import NoopMetrics, NoopTracer from domain.error import SandboxError, SandboxStartError -from domain.sandbox import SandboxEndpoint, SandboxSession, SandboxStatus +from domain.sandbox import SandboxSession, SandboxStatus from usecase.interface import Attrs, AttrValue CHAT_ID = UUID('123e4567-e89b-12d3-a456-426614174000') NON_CANONICAL_CHAT_ID = '123E4567E89B12D3A456426614174000' SESSION_ID = UUID('00000000-0000-0000-0000-000000000010') -AGENT_ID = 'agent-alpha' - - -def _network_attrs(network_name: str = 'sandbox', ip: str = '172.20.0.8') -> dict[str, object]: - return { - 'NetworkSettings': { - 'Networks': { - network_name: { - 'IPAddress': ip, - } - } - } - } class FakeContainer: - def __init__( - self, - container_id: str, - *, - network_name: str = 'sandbox', - ip: str = '172.20.0.8', - ) -> None: + def __init__(self, container_id: str) -> None: self.id = container_id self.stop_calls = 0 - self.remove_calls: list[dict[str, bool]] = [] - self.reload_calls = 0 - self.attrs = _network_attrs(network_name, ip) - self.labels: dict[str, str] = {} def stop(self) -> None: self.stop_calls += 1 - def reload(self) -> None: - self.reload_calls += 1 - - def remove(self, *, force: bool) -> None: - self.remove_calls.append({'force': force}) - class FakeListedContainer(FakeContainer): def __init__( @@ -67,12 +37,10 @@ class FakeListedContainer(FakeContainer): *, labels: dict[str, str], created_at: str, - network_name: str = 'sandbox', - ip: str = '172.20.0.8', ) -> None: - super().__init__(container_id, network_name=network_name, ip=ip) + super().__init__(container_id) self.labels = labels - self.attrs['Created'] = created_at + self.attrs = {'Created': created_at} class FailingStopContainer(FakeListedContainer): @@ -98,10 +66,8 @@ class FailingStopContainer(FakeListedContainer): class RunKwargs(TypedDict): detach: bool - environment: dict[str, str] labels: dict[str, str] mounts: list[Mount] - network: str class RunCall(TypedDict): @@ -124,20 +90,16 @@ class FakeContainers: image: str, *, detach: bool, - environment: dict[str, str], labels: dict[str, str], mounts: list[Mount], - network: str, ) -> FakeContainer: self.run_calls.append( { 'args': (image,), 'kwargs': { 'detach': detach, - 'environment': environment, 'labels': labels, 'mounts': mounts, - 'network': network, }, } ) @@ -304,8 +266,6 @@ def _find_record_call( def build_config(tmp_path: Path) -> SandboxConfig: return SandboxConfig( image='sandbox:latest', - network_name='sandbox', - agent_service_port=8000, ttl_seconds=300, cleanup_interval_seconds=60, chats_root=str(tmp_path / 'chats'), @@ -314,8 +274,6 @@ def build_config(tmp_path: Path) -> SandboxConfig: chat_mount_path='/workspace/chat', dependencies_mount_path='/workspace/dependencies', lambda_tools_mount_path='/workspace/lambda-tools', - volume_mount_path='/workspace/volume', - extra_env={}, ) @@ -345,8 +303,6 @@ def test_runtime_create_applies_mount_policy_and_labels_with_canonical_chat_id( session = runtime.create( session_id=SESSION_ID, chat_id=UUID(NON_CANONICAL_CHAT_ID), - agent_id=AGENT_ID, - volume_host_path=str(tmp_path / 'request-volume'), created_at=created_at, expires_at=expires_at, ) @@ -357,25 +313,15 @@ def test_runtime_create_applies_mount_policy_and_labels_with_canonical_chat_id( assert session.status is SandboxStatus.RUNNING assert session.created_at == created_at assert session.expires_at == expires_at - assert session.agent_id == AGENT_ID - assert session.volume_host_path == str( - (tmp_path / 'request-volume').resolve(strict=False) - ) - assert session.endpoint == SandboxEndpoint(ip='172.20.0.8', port=8000) assert (tmp_path / 'chats' / str(CHAT_ID)).is_dir() call = containers.run_calls[0] assert call['args'] == ('sandbox:latest',) assert call['kwargs']['detach'] is True - assert call['kwargs']['environment'] == {'AGENT_ID': AGENT_ID} - assert call['kwargs']['network'] == 'sandbox' assert call['kwargs']['labels'] == { 'session_id': str(SESSION_ID), 'chat_id': str(CHAT_ID), 'expires_at': expires_at.isoformat(), - 'agent_id': AGENT_ID, - 'volume_host_path': str((tmp_path / 'request-volume').resolve(strict=False)), - 'endpoint_port': '8000', } mounts = call['kwargs']['mounts'] @@ -398,103 +344,9 @@ def test_runtime_create_applies_mount_policy_and_labels_with_canonical_chat_id( 'Type': 'bind', 'ReadOnly': True, }, - { - 'Target': '/workspace/volume', - 'Source': str((tmp_path / 'request-volume').resolve(strict=False)), - 'Type': 'bind', - 'ReadOnly': False, - }, ] -def test_runtime_create_uses_configured_network_for_endpoint(tmp_path: Path) -> None: - config = replace( - build_config(tmp_path), - network_name='agent-net', - agent_service_port=9000, - ) - (tmp_path / 'dependencies').mkdir() - (tmp_path / 'lambda-tools').mkdir() - containers = FakeContainers( - run_result=FakeContainer( - 'container-456', - network_name='agent-net', - ip='10.42.0.7', - ) - ) - runtime = build_runtime(config, containers) - created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) - expires_at = created_at + timedelta(minutes=5) - - session = runtime.create( - session_id=SESSION_ID, - chat_id=CHAT_ID, - agent_id=AGENT_ID, - volume_host_path=str(tmp_path / 'request-volume'), - created_at=created_at, - expires_at=expires_at, - ) - - assert containers.run_calls[0]['kwargs']['network'] == 'agent-net' - assert session.endpoint == SandboxEndpoint(ip='10.42.0.7', port=9000) - - -def test_runtime_create_removes_container_when_endpoint_extraction_fails( - tmp_path: Path, -) -> None: - config = build_config(tmp_path) - (tmp_path / 'dependencies').mkdir() - (tmp_path / 'lambda-tools').mkdir() - created_container = FakeContainer( - 'container-789', - network_name='unexpected-net', - ) - containers = FakeContainers(run_result=created_container) - runtime = build_runtime(config, containers) - - with pytest.raises(SandboxStartError) as excinfo: - runtime.create( - session_id=SESSION_ID, - chat_id=CHAT_ID, - agent_id=AGENT_ID, - volume_host_path=str(tmp_path / 'request-volume'), - created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC), - expires_at=datetime(2026, 4, 2, 12, 5, tzinfo=UTC), - ) - - assert str(excinfo.value) == 'sandbox_start_failed' - assert containers.run_calls - assert created_container.remove_calls == [{'force': True}] - - -def test_runtime_create_applies_request_volume_bind_as_rw(tmp_path: Path) -> None: - config = build_config(tmp_path) - (tmp_path / 'dependencies').mkdir() - (tmp_path / 'lambda-tools').mkdir() - containers = FakeContainers() - runtime = build_runtime(config, containers) - created_at = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) - expires_at = created_at + timedelta(minutes=5) - volume_host_path = str(tmp_path / 'request-volume') - - runtime.create( - session_id=SESSION_ID, - chat_id=CHAT_ID, - agent_id=AGENT_ID, - volume_host_path=volume_host_path, - created_at=created_at, - expires_at=expires_at, - ) - - mounts = [dict(mount) for mount in containers.run_calls[0]['kwargs']['mounts']] - assert { - 'Target': '/workspace/volume', - 'Source': str((tmp_path / 'request-volume').resolve(strict=False)), - 'Type': 'bind', - 'ReadOnly': False, - } in mounts - - def test_runtime_create_records_observability(tmp_path: Path) -> None: config = build_config(tmp_path) (tmp_path / 'dependencies').mkdir() @@ -514,8 +366,6 @@ def test_runtime_create_records_observability(tmp_path: Path) -> None: session = runtime.create( session_id=SESSION_ID, chat_id=CHAT_ID, - agent_id=AGENT_ID, - volume_host_path=str(tmp_path / 'request-volume'), created_at=created_at, expires_at=expires_at, ) @@ -552,8 +402,6 @@ def test_runtime_create_raises_start_error_when_container_id_is_missing( runtime.create( session_id=SESSION_ID, chat_id=CHAT_ID, - agent_id=AGENT_ID, - volume_host_path=str(tmp_path / 'request-volume'), created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC), expires_at=datetime(2026, 4, 2, 12, 5, tzinfo=UTC), ) @@ -582,8 +430,6 @@ def test_runtime_create_error_records_observability_when_container_id_missing( runtime.create( session_id=SESSION_ID, chat_id=CHAT_ID, - agent_id=AGENT_ID, - volume_host_path=str(tmp_path / 'request-volume'), created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC), expires_at=datetime(2026, 4, 2, 12, 5, tzinfo=UTC), ) @@ -592,7 +438,7 @@ def test_runtime_create_error_records_observability_when_container_id_missing( _find_increment_call( metrics, 'sandbox.runtime.error.total', - attrs={'operation': 'create', 'error.type': 'ValueError'}, + attrs={'operation': 'create', 'error.type': 'SandboxStartError'}, ) duration_call = _find_record_call( metrics, @@ -752,38 +598,6 @@ def test_runtime_stop_records_observability_on_success(tmp_path: Path) -> None: assert stop_error_calls == [] -def test_runtime_delete_removes_container_with_force(tmp_path: Path) -> None: - config = build_config(tmp_path) - containers = FakeContainers() - container = FakeListedContainer( - 'container-123', - labels={ - 'session_id': str(SESSION_ID), - 'chat_id': str(CHAT_ID), - 'expires_at': '2026-04-02T12:05:00+00:00', - }, - created_at='2026-04-02T12:00:00Z', - ) - containers.get_result = container - runtime = build_runtime(config, containers) - - runtime.delete('container-123') - - assert containers.get_calls == ['container-123'] - assert container.remove_calls == [{'force': True}] - - -def test_runtime_delete_ignores_missing_container(tmp_path: Path) -> None: - config = build_config(tmp_path) - containers = FakeContainers() - containers.get_result = NotFound('missing') - runtime = build_runtime(config, containers) - - runtime.delete('container-123') - - assert containers.get_calls == ['container-123'] - - def test_runtime_list_active_sessions_reads_valid_labeled_containers( tmp_path: Path, ) -> None: @@ -797,9 +611,6 @@ def test_runtime_list_active_sessions_reads_valid_labeled_containers( 'session_id': str(SESSION_ID), 'chat_id': str(CHAT_ID), 'expires_at': expires_at.isoformat(), - 'agent_id': AGENT_ID, - 'volume_host_path': str(tmp_path / 'request-volume'), - 'endpoint_port': '8000', }, created_at='2026-04-02T12:00:00Z', ), @@ -824,24 +635,10 @@ def test_runtime_list_active_sessions_reads_valid_labeled_containers( status=SandboxStatus.RUNNING, created_at=datetime(2026, 4, 2, 12, 0, tzinfo=UTC), expires_at=expires_at, - agent_id=AGENT_ID, - volume_host_path=str(tmp_path / 'request-volume'), - endpoint=SandboxEndpoint(ip='172.20.0.8', port=8000), ) ] assert containers.list_calls == [ - { - 'filters': { - 'label': [ - 'session_id', - 'chat_id', - 'expires_at', - 'agent_id', - 'volume_host_path', - 'endpoint_port', - ] - } - } + {'filters': {'label': ['session_id', 'chat_id', 'expires_at']}} ] @@ -856,9 +653,6 @@ def test_runtime_list_active_records_observability(tmp_path: Path) -> None: 'session_id': str(SESSION_ID), 'chat_id': str(CHAT_ID), 'expires_at': expires_at.isoformat(), - 'agent_id': AGENT_ID, - 'volume_host_path': str(tmp_path / 'request-volume'), - 'endpoint_port': '8000', }, created_at='2026-04-02T12:00:00Z', ), diff --git a/test/test_sandbox_usecase.py b/test/test_sandbox_usecase.py index ba16620..b2e3dcb 100644 --- a/test/test_sandbox_usecase.py +++ b/test/test_sandbox_usecase.py @@ -6,23 +6,14 @@ from uuid import UUID import pytest from adapter.observability.noop import NoopMetrics, NoopTracer -from domain.error import SandboxConflictError from domain.sandbox import SandboxSession, SandboxStatus from repository.sandbox_lock import ProcessLocalSandboxLifecycleLocker from repository.sandbox_session import InMemorySandboxSessionRepository from usecase.interface import Attrs, AttrValue -from usecase.sandbox import ( - CleanupExpiredSandboxes, - CreateSandbox, - CreateSandboxCommand, - DeleteSandbox, - DeleteSandboxCommand, -) +from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox, CreateSandboxCommand CHAT_ID = UUID('11111111-1111-1111-1111-111111111111') NON_CANONICAL_CHAT_ID = '11111111111111111111111111111111' -AGENT_ID = 'agent-alpha' -VOLUME_HOST_PATH = '/srv/sandbox/request-volume' EXPIRED_CHAT_ID = UUID('22222222-2222-2222-2222-222222222222') BOUNDARY_CHAT_ID = UUID('33333333-3333-3333-3333-333333333333') ACTIVE_CHAT_ID = UUID('44444444-4444-4444-4444-444444444444') @@ -39,19 +30,6 @@ SESSION_CLEAN_ID = UUID('00000000-0000-0000-0000-000000000008') SESSION_REPLACEMENT_ID = UUID('00000000-0000-0000-0000-000000000009') -def _create_command( - chat_id: UUID = CHAT_ID, - *, - agent_id: str = AGENT_ID, - volume_host_path: str = VOLUME_HOST_PATH, -) -> CreateSandboxCommand: - return CreateSandboxCommand( - chat_id=chat_id, - agent_id=agent_id, - volume_host_path=volume_host_path, - ) - - class FakeClock: def __init__(self, now: datetime) -> None: self._now = now @@ -260,7 +238,6 @@ class BlockingCreateRuntime: def __init__(self) -> None: self.create_calls: list[dict[str, object]] = [] self.stop_calls: list[str] = [] - self.delete_calls: list[str] = [] self.create_started = threading.Event() self.allow_create = threading.Event() @@ -269,8 +246,6 @@ class BlockingCreateRuntime: *, session_id: UUID, chat_id: UUID, - agent_id: str, - volume_host_path: str, created_at: datetime, expires_at: datetime, ) -> SandboxSession: @@ -278,8 +253,6 @@ class BlockingCreateRuntime: { 'session_id': session_id, 'chat_id': chat_id, - 'agent_id': agent_id, - 'volume_host_path': volume_host_path, 'created_at': created_at, 'expires_at': expires_at, } @@ -293,16 +266,11 @@ class BlockingCreateRuntime: status=SandboxStatus.RUNNING, created_at=created_at, expires_at=expires_at, - agent_id=agent_id, - volume_host_path=volume_host_path, ) def stop(self, container_id: str) -> None: self.stop_calls.append(container_id) - def delete(self, container_id: str) -> None: - self.delete_calls.append(container_id) - class StaleSnapshotRepository(InMemorySandboxSessionRepository): def __init__(self, snapshot: SandboxSession) -> None: @@ -333,15 +301,12 @@ class FakeRuntime: def __init__(self) -> None: self.create_calls: list[dict[str, object]] = [] self.stop_calls: list[str] = [] - self.delete_calls: list[str] = [] def create( self, *, session_id: UUID, chat_id: UUID, - agent_id: str, - volume_host_path: str, created_at: datetime, expires_at: datetime, ) -> SandboxSession: @@ -349,8 +314,6 @@ class FakeRuntime: { 'session_id': session_id, 'chat_id': chat_id, - 'agent_id': agent_id, - 'volume_host_path': volume_host_path, 'created_at': created_at, 'expires_at': expires_at, } @@ -362,16 +325,11 @@ class FakeRuntime: status=SandboxStatus.RUNNING, created_at=created_at, expires_at=expires_at, - agent_id=agent_id, - volume_host_path=volume_host_path, ) def stop(self, container_id: str) -> None: self.stop_calls.append(container_id) - def delete(self, container_id: str) -> None: - self.delete_calls.append(container_id) - class FailingStopRuntime(FakeRuntime): def __init__(self, failing_container_id: str) -> None: @@ -394,8 +352,6 @@ class FailingCreateRuntime(FakeRuntime): *, session_id: UUID, chat_id: UUID, - agent_id: str, - volume_host_path: str, created_at: datetime, expires_at: datetime, ) -> SandboxSession: @@ -403,8 +359,6 @@ class FailingCreateRuntime(FakeRuntime): { 'session_id': session_id, 'chat_id': chat_id, - 'agent_id': agent_id, - 'volume_host_path': volume_host_path, 'created_at': created_at, 'expires_at': expires_at, } @@ -421,8 +375,6 @@ def test_create_sandbox_reuses_active_session_when_not_expired() -> None: status=SandboxStatus.RUNNING, created_at=now - timedelta(minutes=1), expires_at=now + timedelta(minutes=4), - agent_id=AGENT_ID, - volume_host_path=VOLUME_HOST_PATH, ) repository = InMemorySandboxSessionRepository() repository.save(session) @@ -440,7 +392,7 @@ def test_create_sandbox_reuses_active_session_when_not_expired() -> None: ttl=timedelta(minutes=5), ) - result = usecase.execute(_create_command()) + result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) assert result == session assert runtime.create_calls == [] @@ -469,8 +421,6 @@ def test_create_sandbox_reuse_records_observability() -> None: status=SandboxStatus.RUNNING, created_at=now - timedelta(minutes=1), expires_at=now + timedelta(minutes=4), - agent_id=AGENT_ID, - volume_host_path=VOLUME_HOST_PATH, ) repository = InMemorySandboxSessionRepository() repository.save(session) @@ -487,7 +437,7 @@ def test_create_sandbox_reuse_records_observability() -> None: ttl=timedelta(minutes=5), ) - result = usecase.execute(_create_command()) + result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) assert result == session _assert_increment_metric_present( @@ -536,7 +486,7 @@ def test_create_sandbox_replace_records_observability_and_final_active_count( ) monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID) - result = usecase.execute(_create_command()) + result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) assert result.session_id == SESSION_NEW_ID assert repository.count_active() == 1 @@ -593,15 +543,13 @@ def test_create_sandbox_replaces_expired_session_and_creates_new_one( ) monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID) - result = usecase.execute(_create_command()) + result = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) assert runtime.stop_calls == ['container-old'] assert runtime.create_calls == [ { 'session_id': SESSION_NEW_ID, 'chat_id': CHAT_ID, - 'agent_id': AGENT_ID, - 'volume_host_path': VOLUME_HOST_PATH, 'created_at': now, 'expires_at': now + timedelta(minutes=5), } @@ -613,8 +561,6 @@ def test_create_sandbox_replaces_expired_session_and_creates_new_one( status=SandboxStatus.RUNNING, created_at=now, expires_at=now + timedelta(minutes=5), - agent_id=AGENT_ID, - volume_host_path=VOLUME_HOST_PATH, ) assert repository.get_active_by_chat_id(CHAT_ID) == result assert locker.chat_ids == [CHAT_ID] @@ -657,7 +603,7 @@ def test_create_sandbox_creates_new_session_when_none_exists() -> None: ttl=timedelta(minutes=5), ) - result = usecase.execute(_create_command(UUID(NON_CANONICAL_CHAT_ID))) + result = usecase.execute(CreateSandboxCommand(chat_id=UUID(NON_CANONICAL_CHAT_ID))) assert result.chat_id == CHAT_ID assert result.container_id == f'container-{result.session_id}' @@ -668,8 +614,6 @@ def test_create_sandbox_creates_new_session_when_none_exists() -> None: assert runtime.create_calls[0] == { 'session_id': result.session_id, 'chat_id': CHAT_ID, - 'agent_id': AGENT_ID, - 'volume_host_path': VOLUME_HOST_PATH, 'created_at': now, 'expires_at': now + timedelta(minutes=5), } @@ -689,105 +633,6 @@ def test_create_sandbox_creates_new_session_when_none_exists() -> None: ] -def test_create_sandbox_passes_agent_and_volume_params_to_runtime() -> None: - now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) - repository = InMemorySandboxSessionRepository() - runtime = FakeRuntime() - usecase = CreateSandbox( - repository=repository, - locker=FakeLocker(), - runtime=runtime, - clock=FakeClock(now), - logger=FakeLogger(), - metrics=NoopMetrics(), - tracer=NoopTracer(), - ttl=timedelta(minutes=5), - ) - - result = usecase.execute(_create_command()) - - assert len(runtime.create_calls) == 1 - assert runtime.create_calls[0]['agent_id'] == AGENT_ID - assert runtime.create_calls[0]['volume_host_path'] == VOLUME_HOST_PATH - assert result.agent_id == AGENT_ID - assert result.volume_host_path == VOLUME_HOST_PATH - assert repository.get_active_by_chat_id(CHAT_ID) == result - - -def test_create_sandbox_reuses_active_session_when_params_match() -> None: - now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) - session = SandboxSession( - session_id=SESSION_REUSED_ID, - chat_id=CHAT_ID, - container_id='container-1', - status=SandboxStatus.RUNNING, - created_at=now - timedelta(minutes=1), - expires_at=now + timedelta(minutes=4), - agent_id=AGENT_ID, - volume_host_path=VOLUME_HOST_PATH, - ) - repository = InMemorySandboxSessionRepository() - repository.save(session) - runtime = FakeRuntime() - usecase = CreateSandbox( - repository=repository, - locker=FakeLocker(), - runtime=runtime, - clock=FakeClock(now), - logger=FakeLogger(), - metrics=NoopMetrics(), - tracer=NoopTracer(), - ttl=timedelta(minutes=5), - ) - - result = usecase.execute(_create_command()) - - assert result == session - assert runtime.create_calls == [] - assert runtime.stop_calls == [] - assert runtime.delete_calls == [] - - -def test_create_sandbox_reuse_mismatch_raises_conflict_without_runtime_calls() -> None: - now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) - session = SandboxSession( - session_id=SESSION_REUSED_ID, - chat_id=CHAT_ID, - container_id='container-1', - status=SandboxStatus.RUNNING, - created_at=now - timedelta(minutes=1), - expires_at=now + timedelta(minutes=4), - agent_id=AGENT_ID, - volume_host_path=VOLUME_HOST_PATH, - ) - repository = InMemorySandboxSessionRepository() - repository.save(session) - runtime = FakeRuntime() - usecase = CreateSandbox( - repository=repository, - locker=FakeLocker(), - runtime=runtime, - clock=FakeClock(now), - logger=FakeLogger(), - metrics=NoopMetrics(), - tracer=NoopTracer(), - ttl=timedelta(minutes=5), - ) - - with pytest.raises(SandboxConflictError): - usecase.execute( - _create_command( - agent_id='agent-beta', - volume_host_path='/srv/sandbox/other-volume', - ) - ) - - assert runtime.create_calls == [] - assert runtime.stop_calls == [] - assert runtime.delete_calls == [] - assert repository.get_active_by_chat_id(CHAT_ID) == session - - def test_create_sandbox_error_records_observability(monkeypatch) -> None: now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) metrics = RecordingMetrics() @@ -805,7 +650,7 @@ def test_create_sandbox_error_records_observability(monkeypatch) -> None: monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID) with pytest.raises(RuntimeError, match='create_failed') as excinfo: - usecase.execute(_create_command()) + usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) _assert_increment_metric_present( metrics, @@ -843,7 +688,7 @@ def test_create_sandbox_save_failure_stops_untracked_container(monkeypatch) -> N monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID) with pytest.raises(RuntimeError, match='save_failed'): - usecase.execute(_create_command()) + usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) assert len(runtime.create_calls) == 1 assert runtime.stop_calls == [f'container-{SESSION_NEW_ID}'] @@ -886,7 +731,7 @@ def test_create_sandbox_replace_stop_failure_preserves_separate_identities( monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID) with pytest.raises(RuntimeError, match='stop_failed') as excinfo: - usecase.execute(_create_command()) + usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) _assert_increment_metric_present( metrics, @@ -941,7 +786,7 @@ def test_create_sandbox_replace_save_failure_records_stage_safe_trace_ids( monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID) with pytest.raises(RuntimeError, match='save_failed') as excinfo: - usecase.execute(_create_command()) + usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) assert runtime.stop_calls == ['container-old', f'container-{SESSION_NEW_ID}'] assert len(runtime.create_calls) == 1 @@ -995,7 +840,7 @@ def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id( def run_create(index: int) -> None: try: - results[index] = usecase.execute(_create_command()) + results[index] = usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID)) except Exception as exc: errors.append(exc) @@ -1023,8 +868,6 @@ def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id( status=SandboxStatus.RUNNING, created_at=now, expires_at=now + timedelta(minutes=5), - agent_id=AGENT_ID, - volume_host_path=VOLUME_HOST_PATH, ) assert len(runtime.create_calls) == 1 assert runtime.stop_calls == [] @@ -1052,67 +895,6 @@ def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id( ] -def test_delete_sandbox_deletes_session_and_removes_registry_entry() -> None: - now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) - session = SandboxSession( - session_id=SESSION_ACTIVE_ID, - chat_id=CHAT_ID, - container_id='container-active', - status=SandboxStatus.RUNNING, - created_at=now - timedelta(minutes=1), - expires_at=now + timedelta(minutes=4), - agent_id=AGENT_ID, - volume_host_path=VOLUME_HOST_PATH, - ) - repository = InMemorySandboxSessionRepository() - repository.save(session) - runtime = FakeRuntime() - locker = FakeLocker() - usecase = DeleteSandbox( - repository=repository, - locker=locker, - runtime=runtime, - logger=FakeLogger(), - metrics=NoopMetrics(), - tracer=NoopTracer(), - ) - - result = usecase.execute(DeleteSandboxCommand(chat_id=CHAT_ID)) - - assert result.chat_id == CHAT_ID - assert result.result == 'deleted' - assert result.session_id == SESSION_ACTIVE_ID - assert result.container_id == 'container-active' - assert runtime.delete_calls == ['container-active'] - assert runtime.stop_calls == [] - assert repository.get_active_by_chat_id(CHAT_ID) is None - assert locker.chat_ids == [CHAT_ID] - - -def test_delete_sandbox_returns_idempotent_not_found_without_runtime_calls() -> None: - runtime = FakeRuntime() - locker = FakeLocker() - usecase = DeleteSandbox( - repository=InMemorySandboxSessionRepository(), - locker=locker, - runtime=runtime, - logger=FakeLogger(), - metrics=NoopMetrics(), - tracer=NoopTracer(), - ) - - result = usecase.execute(DeleteSandboxCommand(chat_id=CHAT_ID)) - - assert result.chat_id == CHAT_ID - assert result.result == 'not_found' - assert result.session_id is None - assert result.container_id is None - assert runtime.create_calls == [] - assert runtime.stop_calls == [] - assert runtime.delete_calls == [] - assert locker.chat_ids == [CHAT_ID] - - def test_cleanup_expired_sandboxes_stops_and_deletes_only_expired_sessions() -> None: now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC) expired_session = SandboxSession( diff --git a/usecase/interface.py b/usecase/interface.py index f345056..de681d6 100644 --- a/usecase/interface.py +++ b/usecase/interface.py @@ -4,8 +4,10 @@ from types import TracebackType from typing import Protocol, TypeAlias from uuid import UUID +from domain.chat import Chat, ChatAttachmentName, ChatFile from domain.sandbox import SandboxSession from domain.user import User +from domain.workspace import Workspace, WorkspaceUsage AttrValue: TypeAlias = str | int | float | bool Attrs: TypeAlias = Mapping[str, AttrValue] @@ -19,6 +21,81 @@ class UserRepository(Protocol): def save(self, user: User) -> None: ... +class WorkspaceRepository(Protocol): + def get(self, workspace_id: UUID) -> Workspace | None: ... + + def get_by_user_id(self, user_id: UUID) -> Workspace | None: ... + + def save(self, workspace: Workspace) -> None: ... + + +class ChatRepository(Protocol): + def get(self, chat_id: UUID) -> Chat | None: ... + + def list_by_workspace_id(self, workspace_id: UUID) -> list[Chat]: ... + + def save(self, chat: Chat) -> None: ... + + def delete(self, chat_id: UUID) -> None: ... + + +class ChatFileRepository(Protocol): + def get(self, file_id: UUID) -> ChatFile | None: ... + + def get_by_chat_id_and_name( + self, + chat_id: UUID, + name: ChatAttachmentName, + ) -> ChatFile | None: ... + + def list_by_chat_id(self, chat_id: UUID) -> list[ChatFile]: ... + + def save(self, chat_file: ChatFile) -> None: ... + + def delete(self, file_id: UUID) -> None: ... + + def delete_by_chat_id(self, chat_id: UUID) -> None: ... + + +class ChatStorage(Protocol): + def ensure_chat(self, chat: Chat) -> None: ... + + def read_history(self, chat: Chat) -> str: ... + + def write_history(self, chat: Chat, content: str) -> None: ... + + def delete_chat(self, chat: Chat) -> None: ... + + def write_attachment( + self, + chat: Chat, + file_name: ChatAttachmentName, + content: bytes, + ) -> int: ... + + def read_attachment(self, chat: Chat, file_name: ChatAttachmentName) -> bytes: ... + + def delete_attachment( + self, + chat: Chat, + file_name: ChatAttachmentName, + ) -> None: ... + + def clear_attachments(self, chat: Chat) -> None: ... + + +class StorageUsageReader(Protocol): + def get_workspace_usage( + self, + workspace: Workspace, + chats: list[Chat], + ) -> WorkspaceUsage: ... + + +class IdGenerator(Protocol): + def new(self) -> UUID: ... + + class SandboxSessionRepository(Protocol): def get_active_by_chat_id(self, chat_id: UUID) -> SandboxSession | None: ... @@ -52,16 +129,12 @@ class SandboxRuntime(Protocol): *, session_id: UUID, chat_id: UUID, - agent_id: str, - volume_host_path: str, created_at: datetime, expires_at: datetime, ) -> SandboxSession: ... def stop(self, container_id: str) -> None: ... - def delete(self, container_id: str) -> None: ... - class Clock(Protocol): def now(self) -> datetime: ... diff --git a/usecase/sandbox.py b/usecase/sandbox.py index 9574ca5..59f1584 100644 --- a/usecase/sandbox.py +++ b/usecase/sandbox.py @@ -2,7 +2,6 @@ from dataclasses import dataclass from datetime import timedelta from uuid import UUID, uuid4 -from domain.error import SandboxConflictError from domain.sandbox import SandboxSession from usecase.interface import ( Clock, @@ -11,7 +10,6 @@ from usecase.interface import ( SandboxLifecycleLocker, SandboxRuntime, SandboxSessionRepository, - Span, Tracer, ) @@ -19,97 +17,6 @@ from usecase.interface import ( @dataclass(frozen=True, slots=True) class CreateSandboxCommand: chat_id: UUID - agent_id: str - volume_host_path: str - - -@dataclass(frozen=True, slots=True) -class DeleteSandboxCommand: - chat_id: UUID - - -@dataclass(frozen=True, slots=True) -class DeleteSandboxResult: - chat_id: UUID - result: str - session_id: UUID | None = None - container_id: str | None = None - - -class DeleteSandbox: - def __init__( - self, - repository: SandboxSessionRepository, - locker: SandboxLifecycleLocker, - runtime: SandboxRuntime, - logger: Logger, - metrics: Metrics, - tracer: Tracer, - ) -> None: - self._repository = repository - self._locker = locker - self._runtime = runtime - self._logger = logger - self._metrics = metrics - self._tracer = tracer - - def execute(self, command: DeleteSandboxCommand) -> DeleteSandboxResult: - chat_id = command.chat_id - - with self._tracer.start_span( - 'usecase.delete_sandbox', - attrs={'chat.id': str(chat_id)}, - ) as span: - session: SandboxSession | None = None - try: - with self._locker.lock(chat_id): - session = self._repository.get_active_by_chat_id(chat_id) - if session is None: - span.set_attribute('sandbox.result', 'not_found') - self._metrics.increment( - 'sandbox.delete.total', - attrs=_result_metric_attrs('not_found'), - ) - self._logger.info( - 'sandbox_delete_not_found', - attrs={'chat_id': str(chat_id)}, - ) - return DeleteSandboxResult( - chat_id=chat_id, - result='not_found', - ) - - _set_session_span_attrs(span, session) - self._runtime.delete(session.container_id) - self._repository.delete(session.session_id) - _set_active_count(self._metrics, self._repository) - span.set_attribute('sandbox.result', 'deleted') - self._metrics.increment( - 'sandbox.delete.total', - attrs=_result_metric_attrs('deleted'), - ) - self._logger.info( - 'sandbox_deleted', - attrs=_sandbox_attrs(session), - ) - return DeleteSandboxResult( - chat_id=chat_id, - result='deleted', - session_id=session.session_id, - container_id=session.container_id, - ) - except Exception as exc: - span.set_attribute('sandbox.result', 'error') - self._metrics.increment( - 'sandbox.delete.total', - attrs=_result_metric_attrs('error'), - ) - span.record_error(exc) - self._logger.error( - 'sandbox_delete_failed', - attrs=_delete_error_attrs(chat_id, session, exc), - ) - raise class CreateSandbox: @@ -138,11 +45,7 @@ class CreateSandbox: with self._tracer.start_span( 'usecase.create_sandbox', - attrs={ - 'chat.id': str(chat_id), - 'agent.id': command.agent_id, - 'volume.host_path': command.volume_host_path, - }, + attrs={'chat.id': str(chat_id)}, ) as span: try: with self._locker.lock(chat_id): @@ -150,11 +53,75 @@ class CreateSandbox: now = self._clock.now() if session is not None and session.expires_at > now: - return self._reuse_or_conflict(command, session, span) + span.set_attribute('session.id', str(session.session_id)) + span.set_attribute('container.id', session.container_id) + span.set_attribute('sandbox.result', 'reused') + self._metrics.increment( + 'sandbox.create.total', + attrs=_result_metric_attrs('reused'), + ) + self._logger.info( + 'sandbox_reused', + attrs=_sandbox_attrs(session), + ) + return session - return self._create_or_replace(command, session, span) - except SandboxConflictError: - raise + result = 'created' + new_session_id: UUID | None = None + if session is not None: + result = 'replaced' + new_session_id = _new_session_id() + span.set_attribute( + 'sandbox.previous_session.id', + str(session.session_id), + ) + span.set_attribute( + 'sandbox.previous_container.id', + session.container_id, + ) + span.set_attribute( + 'sandbox.new_session.id', + str(new_session_id), + ) + self._logger.info( + 'sandbox_replaced', + attrs=_sandbox_attrs(session), + ) + self._runtime.stop(session.container_id) + self._repository.delete(session.session_id) + _set_active_count(self._metrics, self._repository) + + created_at = self._clock.now() + expires_at = created_at + self._ttl + if new_session_id is None: + new_session_id = _new_session_id() + span.set_attribute('session.id', str(new_session_id)) + new_session = self._runtime.create( + session_id=new_session_id, + chat_id=chat_id, + created_at=created_at, + expires_at=expires_at, + ) + if result == 'replaced': + span.set_attribute( + 'sandbox.new_container.id', + new_session.container_id, + ) + self._save_created_session(new_session) + _set_active_count(self._metrics, self._repository) + if result == 'replaced': + span.set_attribute('session.id', str(new_session.session_id)) + span.set_attribute('container.id', new_session.container_id) + span.set_attribute('sandbox.result', result) + self._metrics.increment( + 'sandbox.create.total', + attrs=_result_metric_attrs(result), + ) + self._logger.info( + 'sandbox_created', + attrs=_sandbox_attrs(new_session), + ) + return new_session except Exception as exc: span.set_attribute('sandbox.result', 'error') self._metrics.increment( @@ -164,98 +131,6 @@ class CreateSandbox: span.record_error(exc) raise - def _reuse_or_conflict( - self, - command: CreateSandboxCommand, - session: SandboxSession, - span: Span, - ) -> SandboxSession: - _set_session_span_attrs(span, session) - if not _session_matches_command(session, command): - reason = _conflict_reason(session, command) - span.set_attribute('sandbox.result', 'conflict') - span.set_attribute('sandbox.conflict.reason', reason) - self._metrics.increment( - 'sandbox.create.total', - attrs=_conflict_metric_attrs(reason), - ) - self._logger.warning( - 'sandbox_conflict', - attrs=_conflict_attrs(session, command, reason), - ) - raise SandboxConflictError(str(command.chat_id)) - - span.set_attribute('sandbox.result', 'reused') - self._metrics.increment( - 'sandbox.create.total', - attrs=_result_metric_attrs('reused'), - ) - self._logger.info( - 'sandbox_reused', - attrs=_sandbox_attrs(session), - ) - return session - - def _create_or_replace( - self, - command: CreateSandboxCommand, - session: SandboxSession | None, - span: Span, - ) -> SandboxSession: - result = 'created' - new_session_id: UUID | None = None - if session is not None: - result = 'replaced' - new_session_id = self._replace_expired_session(session, span) - - created_at = self._clock.now() - expires_at = created_at + self._ttl - if new_session_id is None: - new_session_id = _new_session_id() - span.set_attribute('session.id', str(new_session_id)) - new_session = self._runtime.create( - session_id=new_session_id, - chat_id=command.chat_id, - agent_id=command.agent_id, - volume_host_path=command.volume_host_path, - created_at=created_at, - expires_at=expires_at, - ) - if result == 'replaced': - _set_new_session_span_attrs(span, new_session) - self._save_created_session(new_session) - _set_active_count(self._metrics, self._repository) - if result == 'replaced': - span.set_attribute('session.id', str(new_session.session_id)) - _set_session_span_attrs(span, new_session) - span.set_attribute('sandbox.result', result) - self._metrics.increment( - 'sandbox.create.total', - attrs=_result_metric_attrs(result), - ) - self._logger.info( - 'sandbox_created', - attrs=_sandbox_attrs(new_session), - ) - return new_session - - def _replace_expired_session( - self, - session: SandboxSession, - span: Span, - ) -> UUID: - new_session_id = _new_session_id() - _set_previous_session_span_attrs(span, session) - span.set_attribute('sandbox.new_session.id', str(new_session_id)) - self._logger.info( - 'sandbox_replaced', - attrs=_sandbox_attrs(session), - ) - self._runtime.stop(session.container_id) - self._repository.delete(session.session_id) - _set_active_count(self._metrics, self._repository) - return new_session_id - def _save_created_session(self, session: SandboxSession) -> None: try: self._repository.save(session) @@ -387,52 +262,6 @@ def _new_session_id() -> UUID: return uuid4() -def _session_matches_command( - session: SandboxSession, - command: CreateSandboxCommand, -) -> bool: - return ( - session.agent_id == command.agent_id - and session.volume_host_path == command.volume_host_path - ) - - -def _conflict_reason( - session: SandboxSession, - command: CreateSandboxCommand, -) -> str: - reasons: list[str] = [] - if session.agent_id != command.agent_id: - reasons.append('agent_id') - if session.volume_host_path != command.volume_host_path: - reasons.append('volume_host_path') - - return ','.join(reasons) - - -def _set_session_span_attrs(span: Span, session: SandboxSession) -> None: - span.set_attribute('session.id', str(session.session_id)) - span.set_attribute('container.id', session.container_id) - span.set_attribute('sandbox.session_agent.id', session.agent_id) - span.set_attribute('sandbox.session_volume.host_path', session.volume_host_path) - if session.endpoint is not None: - span.set_attribute('sandbox.endpoint.ip', session.endpoint.ip) - span.set_attribute('sandbox.endpoint.port', session.endpoint.port) - - -def _set_previous_session_span_attrs(span: Span, session: SandboxSession) -> None: - span.set_attribute('sandbox.previous_session.id', str(session.session_id)) - span.set_attribute('sandbox.previous_container.id', session.container_id) - span.set_attribute('sandbox.previous_agent.id', session.agent_id) - span.set_attribute('sandbox.previous_volume.host_path', session.volume_host_path) - - -def _set_new_session_span_attrs(span: Span, session: SandboxSession) -> None: - span.set_attribute('sandbox.new_container.id', session.container_id) - span.set_attribute('sandbox.new_agent.id', session.agent_id) - span.set_attribute('sandbox.new_volume.host_path', session.volume_host_path) - - def _sandbox_attrs(session: SandboxSession) -> dict[str, str]: return { 'chat_id': str(session.chat_id), @@ -453,42 +282,6 @@ def _result_metric_attrs(result: str) -> dict[str, str]: return {'result': result} -def _conflict_metric_attrs(reason: str) -> dict[str, str]: - return { - 'result': 'conflict', - 'reason': reason, - } - - -def _conflict_attrs( - session: SandboxSession, - command: CreateSandboxCommand, - reason: str, -) -> dict[str, str]: - return { - 'chat_id': str(command.chat_id), - 'session_id': str(session.session_id), - 'container_id': session.container_id, - 'requested_agent_id': command.agent_id, - 'session_agent_id': session.agent_id, - 'requested_volume_host_path': command.volume_host_path, - 'session_volume_host_path': session.volume_host_path, - 'reason': reason, - } - - -def _delete_error_attrs( - chat_id: UUID, - session: SandboxSession | None, - error: Exception, -) -> dict[str, str]: - attrs = {'chat_id': str(chat_id), 'error': type(error).__name__} - if session is not None: - attrs.update(_sandbox_attrs(session)) - - return attrs - - def _error_metric_attrs(error_type: str) -> dict[str, str]: return {'error.type': error_type}