add sandbox runtime control endpoints

This commit is contained in:
Азамат Нураев 2026-04-28 21:53:26 +03:00
parent 0ca0bac9bf
commit 1b38bcfeab
17 changed files with 1408 additions and 119 deletions

View file

@ -247,6 +247,20 @@ def _load_sandbox_config(
env,
'APP_SANDBOX_IMAGE',
),
network_name=_yaml_or_env_str(
section,
'network_name',
'sandbox.network_name',
env,
'APP_SANDBOX_NETWORK_NAME',
),
agent_service_port=_yaml_or_env_int(
section,
'agent_service_port',
'sandbox.agent_service_port',
env,
'APP_SANDBOX_AGENT_SERVICE_PORT',
),
ttl_seconds=_yaml_or_env_int(
section,
'ttl_seconds',
@ -303,6 +317,13 @@ def _load_sandbox_config(
env,
'APP_SANDBOX_LAMBDA_TOOLS_MOUNT_PATH',
),
volume_mount_path=_yaml_or_env_str(
section,
'volume_mount_path',
'sandbox.volume_mount_path',
env,
'APP_SANDBOX_VOLUME_MOUNT_PATH',
),
)

View file

@ -48,6 +48,8 @@ class DockerConfig:
@dataclass(frozen=True, slots=True)
class SandboxConfig:
image: str
network_name: str
agent_service_port: int
ttl_seconds: int
cleanup_interval_seconds: int
chats_root: str
@ -56,6 +58,7 @@ class SandboxConfig:
chat_mount_path: str
dependencies_mount_path: str
lambda_tools_mount_path: str
volume_mount_path: str
@dataclass(frozen=True, slots=True)

View file

@ -15,7 +15,7 @@ from adapter.sandbox.reconciliation import SandboxSessionReconciler
from repository.sandbox_lock import ProcessLocalSandboxLifecycleLocker
from repository.sandbox_session import InMemorySandboxSessionRepository
from usecase.interface import Clock
from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox
from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox, DeleteSandbox
@dataclass(frozen=True, slots=True)
@ -27,6 +27,7 @@ class AppRepositories:
class AppUsecases:
create_sandbox: CreateSandbox
cleanup_expired_sandboxes: CleanupExpiredSandboxes
delete_sandbox: DeleteSandbox
@dataclass(slots=True)
@ -116,6 +117,14 @@ def build_container(
metrics=observability.metrics,
tracer=observability.tracer,
),
delete_sandbox=DeleteSandbox(
repository=sandbox_repository,
locker=sandbox_locker,
runtime=sandbox_runtime,
logger=observability.logger,
metrics=observability.metrics,
tracer=observability.tracer,
),
)
return AppContainer(

View file

@ -9,10 +9,17 @@ from docker.types import Mount
from adapter.config.model import SandboxConfig
from domain.error import SandboxError, SandboxStartError
from domain.sandbox import SandboxSession, SandboxStatus
from domain.sandbox import SandboxEndpoint, SandboxSession, SandboxStatus
from usecase.interface import Metrics, SandboxRuntime, Span, Tracer
SANDBOX_LABELS = ('session_id', 'chat_id', 'expires_at')
SANDBOX_LABELS = (
'session_id',
'chat_id',
'expires_at',
'agent_id',
'volume_host_path',
'endpoint_port',
)
class DockerSandboxRuntime(SandboxRuntime):
@ -33,6 +40,8 @@ class DockerSandboxRuntime(SandboxRuntime):
*,
session_id: UUID,
chat_id: UUID,
agent_id: str,
volume_host_path: str,
created_at: datetime,
expires_at: datetime,
) -> SandboxSession:
@ -49,6 +58,7 @@ class DockerSandboxRuntime(SandboxRuntime):
try:
try:
chat_path = self._chat_path(chat_id)
volume_path = self._request_host_path(volume_host_path)
dependencies_path = self._readonly_host_path(
self._config.dependencies_host_path
)
@ -59,22 +69,42 @@ class DockerSandboxRuntime(SandboxRuntime):
container = self._client.containers.run(
self._config.image,
detach=True,
labels=self._labels(session_id, chat_id, expires_at),
environment={'AGENT_ID': agent_id},
labels=self._labels(
session_id,
chat_id,
expires_at,
agent_id,
str(volume_path),
),
mounts=self._mounts(
chat_path,
volume_path,
dependencies_path,
lambda_tools_path,
),
network=self._config.network_name,
)
try:
container_id = str(getattr(container, 'id', '')).strip()
if not container_id:
raise ValueError('invalid container id')
endpoint = self._endpoint_from_container(container)
except (DockerException, OSError, ValueError) as exc:
self._remove_created_container(container, str(chat_id), exc)
raise SandboxStartError(str(chat_id)) from exc
except SandboxStartError:
raise
except (DockerException, OSError, ValueError) as exc:
raise SandboxStartError(str(chat_id)) from exc
container_id = str(getattr(container, 'id', '')).strip()
if not container_id:
raise SandboxStartError(str(chat_id))
result = 'created'
span.set_attribute('container.id', container_id)
span.set_attribute('agent.id', agent_id)
span.set_attribute('sandbox.endpoint.ip', endpoint.ip)
span.set_attribute('sandbox.endpoint.port', endpoint.port)
span.set_attribute('sandbox.result', result)
return SandboxSession(
session_id=session_id,
@ -83,6 +113,9 @@ class DockerSandboxRuntime(SandboxRuntime):
status=SandboxStatus.RUNNING,
created_at=created_at,
expires_at=expires_at,
agent_id=agent_id,
volume_host_path=str(volume_path),
endpoint=endpoint,
)
except Exception as exc:
span.set_attribute('sandbox.result', result)
@ -132,6 +165,39 @@ class DockerSandboxRuntime(SandboxRuntime):
attrs=_runtime_metric_attrs('stop', result),
)
def delete(self, container_id: str) -> None:
started_at = time.perf_counter()
result = 'error'
with self._tracer.start_span(
'adapter.docker.delete_sandbox',
attrs={'container.id': container_id},
) as span:
try:
container = self._client.containers.get(container_id)
_set_span_container_attrs(span, container)
container.remove(force=True)
result = 'deleted'
span.set_attribute('sandbox.result', result)
except NotFound:
result = 'not_found'
span.set_attribute('sandbox.result', result)
return
except DockerException as exc:
span.set_attribute('sandbox.result', result)
span.record_error(exc)
self._metrics.increment(
'sandbox.runtime.error.total',
attrs=_runtime_error_metric_attrs('delete', type(exc).__name__),
)
raise SandboxError('sandbox_delete_failed') from exc
finally:
self._metrics.record(
'sandbox.runtime.delete.duration_ms',
_duration_ms(started_at),
attrs=_runtime_metric_attrs('delete', result),
)
def list_active_sessions(self) -> list[SandboxSession]:
started_at = time.perf_counter()
result = 'error'
@ -179,16 +245,22 @@ class DockerSandboxRuntime(SandboxRuntime):
session_id: UUID,
chat_id: UUID,
expires_at: datetime,
agent_id: str,
volume_host_path: str,
) -> dict[str, str]:
return {
'session_id': str(session_id),
'chat_id': str(chat_id),
'expires_at': expires_at.isoformat(),
'agent_id': agent_id,
'volume_host_path': volume_host_path,
'endpoint_port': str(self._config.agent_service_port),
}
def _mounts(
self,
chat_path: Path,
volume_path: Path,
dependencies_path: Path,
lambda_tools_path: Path,
) -> list[Mount]:
@ -210,6 +282,11 @@ class DockerSandboxRuntime(SandboxRuntime):
type='bind',
read_only=True,
),
Mount(
target=self._config.volume_mount_path,
source=str(volume_path),
type='bind',
),
]
def _chat_path(self, chat_id: UUID) -> Path:
@ -225,6 +302,29 @@ class DockerSandboxRuntime(SandboxRuntime):
raise ValueError('invalid host path')
return host_path
def _request_host_path(self, path_value: str) -> Path:
host_path = Path(path_value).expanduser()
if not host_path.is_absolute():
raise ValueError('invalid host path')
return host_path.resolve(strict=False)
def _remove_created_container(
self,
container: object,
chat_id: str,
error: Exception,
) -> None:
remove = getattr(container, 'remove', None)
if not callable(remove):
raise SandboxStartError(chat_id) from error
try:
remove(force=True)
except NotFound:
return
except Exception as exc:
raise SandboxStartError(chat_id) from exc
def _session_from_container(self, container: object) -> SandboxSession | None:
container_id = str(getattr(container, 'id', '')).strip()
labels = getattr(container, 'labels', None)
@ -234,6 +334,14 @@ class DockerSandboxRuntime(SandboxRuntime):
try:
session_id = UUID(labels['session_id'])
chat_id = UUID(labels['chat_id'])
agent_id = labels['agent_id']
volume_host_path = labels['volume_host_path']
endpoint_port = int(labels['endpoint_port'])
if not isinstance(agent_id, str) or not isinstance(volume_host_path, str):
raise ValueError('invalid sandbox labels')
if not Path(volume_host_path).is_absolute() or endpoint_port <= 0:
raise ValueError('invalid sandbox labels')
endpoint = self._endpoint_from_container(container, endpoint_port)
created_at = self._container_created_at(container)
expires_at = _parse_datetime(labels['expires_at'])
except (KeyError, TypeError, ValueError):
@ -246,18 +354,13 @@ class DockerSandboxRuntime(SandboxRuntime):
status=SandboxStatus.RUNNING,
created_at=created_at,
expires_at=expires_at,
agent_id=agent_id,
volume_host_path=volume_host_path,
endpoint=endpoint,
)
def _container_created_at(self, container: object) -> datetime:
attrs = getattr(container, 'attrs', None)
if not isinstance(attrs, dict):
reload_container = getattr(container, 'reload', None)
if callable(reload_container):
reload_container()
attrs = getattr(container, 'attrs', None)
if not isinstance(attrs, dict):
raise ValueError('invalid container attrs')
attrs = self._container_attrs(container)
raw_created_at = attrs.get('Created')
if not isinstance(raw_created_at, str):
@ -265,6 +368,42 @@ class DockerSandboxRuntime(SandboxRuntime):
return _parse_datetime(raw_created_at)
def _endpoint_from_container(
self,
container: object,
port: int | None = None,
) -> SandboxEndpoint:
attrs = self._container_attrs(container)
network_settings = attrs.get('NetworkSettings')
if not isinstance(network_settings, dict):
raise ValueError('invalid endpoint')
networks = network_settings.get('Networks')
if not isinstance(networks, dict):
raise ValueError('invalid endpoint')
network = networks.get(self._config.network_name)
if not isinstance(network, dict):
raise ValueError('invalid endpoint')
ip = network.get('IPAddress')
if not isinstance(ip, str) or not ip:
raise ValueError('invalid endpoint')
endpoint_port = self._config.agent_service_port if port is None else port
return SandboxEndpoint(ip=ip, port=endpoint_port)
def _container_attrs(self, container: object) -> dict[str, object]:
reload_container = getattr(container, 'reload', None)
if callable(reload_container):
reload_container()
attrs = getattr(container, 'attrs', None)
if not isinstance(attrs, dict):
raise ValueError('invalid container attrs')
return attrs
def _host_path(self, path_value: str) -> Path:
return Path(path_value).expanduser().resolve(strict=False)

View file

@ -1,7 +1,7 @@
from fastapi import Depends, Request
from adapter.di.container import AppContainer
from usecase.sandbox import CreateSandbox
from usecase.sandbox import CreateSandbox, DeleteSandbox
APP_CONTAINER_STATE = 'container'
APP_CONFIG_STATE = 'config'
@ -18,3 +18,9 @@ def get_create_sandbox(
container: AppContainer = Depends(get_container),
) -> CreateSandbox:
return container.usecases.create_sandbox
def get_delete_sandbox(
container: AppContainer = Depends(get_container),
) -> DeleteSandbox:
return container.usecases.delete_sandbox

View file

@ -1,19 +1,30 @@
from uuid import UUID
from fastapi import APIRouter, Depends, HTTPException, status
from adapter.di.container import AppContainer
from adapter.http.fastapi.dependencies import (
get_container,
get_create_sandbox,
get_delete_sandbox,
)
from adapter.http.fastapi.schemas import (
CreateSandboxRequest,
DeleteSandboxResponse,
ErrorResponse,
HealthResponse,
SandboxEndpointResponse,
SandboxSessionResponse,
)
from domain.error import SandboxError, SandboxStartError
from domain.error import SandboxConflictError, SandboxError, SandboxStartError
from domain.sandbox import SandboxSession
from usecase.sandbox import CreateSandbox, CreateSandboxCommand
from usecase.sandbox import (
CreateSandbox,
CreateSandboxCommand,
DeleteSandbox,
DeleteSandboxCommand,
DeleteSandboxResult,
)
router = APIRouter()
@ -35,6 +46,7 @@ def health(container: AppContainer = Depends(get_container)) -> HealthResponse:
'/create',
response_model=SandboxSessionResponse,
responses={
status.HTTP_409_CONFLICT: {'model': ErrorResponse},
status.HTTP_503_SERVICE_UNAVAILABLE: {'model': ErrorResponse},
status.HTTP_500_INTERNAL_SERVER_ERROR: {'model': ErrorResponse},
},
@ -45,7 +57,18 @@ def create_sandbox(
usecase: CreateSandbox = Depends(get_create_sandbox),
) -> SandboxSessionResponse:
try:
session = usecase.execute(CreateSandboxCommand(chat_id=request.chat_id))
session = usecase.execute(
CreateSandboxCommand(
chat_id=request.chat_id,
agent_id=request.agent_id,
volume_host_path=request.volume_host_path,
)
)
except SandboxConflictError as exc:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=str(exc),
) from exc
except SandboxStartError as exc:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
@ -60,11 +83,55 @@ def create_sandbox(
return _to_sandbox_session_response(session)
@router.delete(
'/sandboxes/{chat_id}',
response_model=DeleteSandboxResponse,
responses={
status.HTTP_500_INTERNAL_SERVER_ERROR: {'model': ErrorResponse},
},
status_code=status.HTTP_200_OK,
)
def delete_sandbox(
chat_id: UUID,
usecase: DeleteSandbox = Depends(get_delete_sandbox),
) -> DeleteSandboxResponse:
try:
result = usecase.execute(DeleteSandboxCommand(chat_id=chat_id))
except SandboxError as exc:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=str(exc),
) from exc
return _to_delete_sandbox_response(result)
def _to_sandbox_session_response(session: SandboxSession) -> SandboxSessionResponse:
if session.endpoint is None:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail='sandbox_endpoint_unavailable',
)
return SandboxSessionResponse(
session_id=session.session_id,
chat_id=session.chat_id,
agent_id=session.agent_id,
volume_host_path=session.volume_host_path,
container_id=session.container_id,
endpoint=SandboxEndpointResponse(
ip=session.endpoint.ip,
port=session.endpoint.port,
),
status=session.status.value,
expires_at=session.expires_at,
)
def _to_delete_sandbox_response(result: DeleteSandboxResult) -> DeleteSandboxResponse:
return DeleteSandboxResponse(
chat_id=result.chat_id,
result=result.result,
session_id=result.session_id,
container_id=result.container_id,
)

View file

@ -1,7 +1,8 @@
from datetime import datetime
from pathlib import Path
from uuid import UUID
from pydantic import BaseModel, ConfigDict
from pydantic import BaseModel, ConfigDict, field_validator
class HealthResponse(BaseModel):
@ -14,15 +15,47 @@ class CreateSandboxRequest(BaseModel):
model_config = ConfigDict(extra='forbid')
chat_id: UUID
agent_id: str
volume_host_path: str
@field_validator('agent_id')
@classmethod
def validate_agent_id(cls, value: str) -> str:
if not value.strip():
raise ValueError('invalid agent_id')
return value
@field_validator('volume_host_path')
@classmethod
def validate_volume_host_path(cls, value: str) -> str:
path = Path(value).expanduser()
if not path.is_absolute():
raise ValueError('invalid volume_host_path')
return str(path.resolve(strict=False))
class SandboxEndpointResponse(BaseModel):
ip: str
port: int
class SandboxSessionResponse(BaseModel):
session_id: UUID
chat_id: UUID
agent_id: str
volume_host_path: str
container_id: str
endpoint: SandboxEndpointResponse
status: str
expires_at: datetime
class DeleteSandboxResponse(BaseModel):
chat_id: UUID
result: str
session_id: UUID | None = None
container_id: str | None = None
class ErrorResponse(BaseModel):
detail: str