add sandbox runtime control endpoints
This commit is contained in:
parent
0ca0bac9bf
commit
1b38bcfeab
17 changed files with 1408 additions and 119 deletions
|
|
@ -247,6 +247,20 @@ def _load_sandbox_config(
|
|||
env,
|
||||
'APP_SANDBOX_IMAGE',
|
||||
),
|
||||
network_name=_yaml_or_env_str(
|
||||
section,
|
||||
'network_name',
|
||||
'sandbox.network_name',
|
||||
env,
|
||||
'APP_SANDBOX_NETWORK_NAME',
|
||||
),
|
||||
agent_service_port=_yaml_or_env_int(
|
||||
section,
|
||||
'agent_service_port',
|
||||
'sandbox.agent_service_port',
|
||||
env,
|
||||
'APP_SANDBOX_AGENT_SERVICE_PORT',
|
||||
),
|
||||
ttl_seconds=_yaml_or_env_int(
|
||||
section,
|
||||
'ttl_seconds',
|
||||
|
|
@ -303,6 +317,13 @@ def _load_sandbox_config(
|
|||
env,
|
||||
'APP_SANDBOX_LAMBDA_TOOLS_MOUNT_PATH',
|
||||
),
|
||||
volume_mount_path=_yaml_or_env_str(
|
||||
section,
|
||||
'volume_mount_path',
|
||||
'sandbox.volume_mount_path',
|
||||
env,
|
||||
'APP_SANDBOX_VOLUME_MOUNT_PATH',
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -48,6 +48,8 @@ class DockerConfig:
|
|||
@dataclass(frozen=True, slots=True)
|
||||
class SandboxConfig:
|
||||
image: str
|
||||
network_name: str
|
||||
agent_service_port: int
|
||||
ttl_seconds: int
|
||||
cleanup_interval_seconds: int
|
||||
chats_root: str
|
||||
|
|
@ -56,6 +58,7 @@ class SandboxConfig:
|
|||
chat_mount_path: str
|
||||
dependencies_mount_path: str
|
||||
lambda_tools_mount_path: str
|
||||
volume_mount_path: str
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ from adapter.sandbox.reconciliation import SandboxSessionReconciler
|
|||
from repository.sandbox_lock import ProcessLocalSandboxLifecycleLocker
|
||||
from repository.sandbox_session import InMemorySandboxSessionRepository
|
||||
from usecase.interface import Clock
|
||||
from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox
|
||||
from usecase.sandbox import CleanupExpiredSandboxes, CreateSandbox, DeleteSandbox
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
|
|
@ -27,6 +27,7 @@ class AppRepositories:
|
|||
class AppUsecases:
|
||||
create_sandbox: CreateSandbox
|
||||
cleanup_expired_sandboxes: CleanupExpiredSandboxes
|
||||
delete_sandbox: DeleteSandbox
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
|
|
@ -116,6 +117,14 @@ def build_container(
|
|||
metrics=observability.metrics,
|
||||
tracer=observability.tracer,
|
||||
),
|
||||
delete_sandbox=DeleteSandbox(
|
||||
repository=sandbox_repository,
|
||||
locker=sandbox_locker,
|
||||
runtime=sandbox_runtime,
|
||||
logger=observability.logger,
|
||||
metrics=observability.metrics,
|
||||
tracer=observability.tracer,
|
||||
),
|
||||
)
|
||||
|
||||
return AppContainer(
|
||||
|
|
|
|||
|
|
@ -9,10 +9,17 @@ from docker.types import Mount
|
|||
|
||||
from adapter.config.model import SandboxConfig
|
||||
from domain.error import SandboxError, SandboxStartError
|
||||
from domain.sandbox import SandboxSession, SandboxStatus
|
||||
from domain.sandbox import SandboxEndpoint, SandboxSession, SandboxStatus
|
||||
from usecase.interface import Metrics, SandboxRuntime, Span, Tracer
|
||||
|
||||
SANDBOX_LABELS = ('session_id', 'chat_id', 'expires_at')
|
||||
SANDBOX_LABELS = (
|
||||
'session_id',
|
||||
'chat_id',
|
||||
'expires_at',
|
||||
'agent_id',
|
||||
'volume_host_path',
|
||||
'endpoint_port',
|
||||
)
|
||||
|
||||
|
||||
class DockerSandboxRuntime(SandboxRuntime):
|
||||
|
|
@ -33,6 +40,8 @@ class DockerSandboxRuntime(SandboxRuntime):
|
|||
*,
|
||||
session_id: UUID,
|
||||
chat_id: UUID,
|
||||
agent_id: str,
|
||||
volume_host_path: str,
|
||||
created_at: datetime,
|
||||
expires_at: datetime,
|
||||
) -> SandboxSession:
|
||||
|
|
@ -49,6 +58,7 @@ class DockerSandboxRuntime(SandboxRuntime):
|
|||
try:
|
||||
try:
|
||||
chat_path = self._chat_path(chat_id)
|
||||
volume_path = self._request_host_path(volume_host_path)
|
||||
dependencies_path = self._readonly_host_path(
|
||||
self._config.dependencies_host_path
|
||||
)
|
||||
|
|
@ -59,22 +69,42 @@ class DockerSandboxRuntime(SandboxRuntime):
|
|||
container = self._client.containers.run(
|
||||
self._config.image,
|
||||
detach=True,
|
||||
labels=self._labels(session_id, chat_id, expires_at),
|
||||
environment={'AGENT_ID': agent_id},
|
||||
labels=self._labels(
|
||||
session_id,
|
||||
chat_id,
|
||||
expires_at,
|
||||
agent_id,
|
||||
str(volume_path),
|
||||
),
|
||||
mounts=self._mounts(
|
||||
chat_path,
|
||||
volume_path,
|
||||
dependencies_path,
|
||||
lambda_tools_path,
|
||||
),
|
||||
network=self._config.network_name,
|
||||
)
|
||||
|
||||
try:
|
||||
container_id = str(getattr(container, 'id', '')).strip()
|
||||
if not container_id:
|
||||
raise ValueError('invalid container id')
|
||||
|
||||
endpoint = self._endpoint_from_container(container)
|
||||
except (DockerException, OSError, ValueError) as exc:
|
||||
self._remove_created_container(container, str(chat_id), exc)
|
||||
raise SandboxStartError(str(chat_id)) from exc
|
||||
except SandboxStartError:
|
||||
raise
|
||||
except (DockerException, OSError, ValueError) as exc:
|
||||
raise SandboxStartError(str(chat_id)) from exc
|
||||
|
||||
container_id = str(getattr(container, 'id', '')).strip()
|
||||
if not container_id:
|
||||
raise SandboxStartError(str(chat_id))
|
||||
|
||||
result = 'created'
|
||||
span.set_attribute('container.id', container_id)
|
||||
span.set_attribute('agent.id', agent_id)
|
||||
span.set_attribute('sandbox.endpoint.ip', endpoint.ip)
|
||||
span.set_attribute('sandbox.endpoint.port', endpoint.port)
|
||||
span.set_attribute('sandbox.result', result)
|
||||
return SandboxSession(
|
||||
session_id=session_id,
|
||||
|
|
@ -83,6 +113,9 @@ class DockerSandboxRuntime(SandboxRuntime):
|
|||
status=SandboxStatus.RUNNING,
|
||||
created_at=created_at,
|
||||
expires_at=expires_at,
|
||||
agent_id=agent_id,
|
||||
volume_host_path=str(volume_path),
|
||||
endpoint=endpoint,
|
||||
)
|
||||
except Exception as exc:
|
||||
span.set_attribute('sandbox.result', result)
|
||||
|
|
@ -132,6 +165,39 @@ class DockerSandboxRuntime(SandboxRuntime):
|
|||
attrs=_runtime_metric_attrs('stop', result),
|
||||
)
|
||||
|
||||
def delete(self, container_id: str) -> None:
|
||||
started_at = time.perf_counter()
|
||||
result = 'error'
|
||||
|
||||
with self._tracer.start_span(
|
||||
'adapter.docker.delete_sandbox',
|
||||
attrs={'container.id': container_id},
|
||||
) as span:
|
||||
try:
|
||||
container = self._client.containers.get(container_id)
|
||||
_set_span_container_attrs(span, container)
|
||||
container.remove(force=True)
|
||||
result = 'deleted'
|
||||
span.set_attribute('sandbox.result', result)
|
||||
except NotFound:
|
||||
result = 'not_found'
|
||||
span.set_attribute('sandbox.result', result)
|
||||
return
|
||||
except DockerException as exc:
|
||||
span.set_attribute('sandbox.result', result)
|
||||
span.record_error(exc)
|
||||
self._metrics.increment(
|
||||
'sandbox.runtime.error.total',
|
||||
attrs=_runtime_error_metric_attrs('delete', type(exc).__name__),
|
||||
)
|
||||
raise SandboxError('sandbox_delete_failed') from exc
|
||||
finally:
|
||||
self._metrics.record(
|
||||
'sandbox.runtime.delete.duration_ms',
|
||||
_duration_ms(started_at),
|
||||
attrs=_runtime_metric_attrs('delete', result),
|
||||
)
|
||||
|
||||
def list_active_sessions(self) -> list[SandboxSession]:
|
||||
started_at = time.perf_counter()
|
||||
result = 'error'
|
||||
|
|
@ -179,16 +245,22 @@ class DockerSandboxRuntime(SandboxRuntime):
|
|||
session_id: UUID,
|
||||
chat_id: UUID,
|
||||
expires_at: datetime,
|
||||
agent_id: str,
|
||||
volume_host_path: str,
|
||||
) -> dict[str, str]:
|
||||
return {
|
||||
'session_id': str(session_id),
|
||||
'chat_id': str(chat_id),
|
||||
'expires_at': expires_at.isoformat(),
|
||||
'agent_id': agent_id,
|
||||
'volume_host_path': volume_host_path,
|
||||
'endpoint_port': str(self._config.agent_service_port),
|
||||
}
|
||||
|
||||
def _mounts(
|
||||
self,
|
||||
chat_path: Path,
|
||||
volume_path: Path,
|
||||
dependencies_path: Path,
|
||||
lambda_tools_path: Path,
|
||||
) -> list[Mount]:
|
||||
|
|
@ -210,6 +282,11 @@ class DockerSandboxRuntime(SandboxRuntime):
|
|||
type='bind',
|
||||
read_only=True,
|
||||
),
|
||||
Mount(
|
||||
target=self._config.volume_mount_path,
|
||||
source=str(volume_path),
|
||||
type='bind',
|
||||
),
|
||||
]
|
||||
|
||||
def _chat_path(self, chat_id: UUID) -> Path:
|
||||
|
|
@ -225,6 +302,29 @@ class DockerSandboxRuntime(SandboxRuntime):
|
|||
raise ValueError('invalid host path')
|
||||
return host_path
|
||||
|
||||
def _request_host_path(self, path_value: str) -> Path:
|
||||
host_path = Path(path_value).expanduser()
|
||||
if not host_path.is_absolute():
|
||||
raise ValueError('invalid host path')
|
||||
return host_path.resolve(strict=False)
|
||||
|
||||
def _remove_created_container(
|
||||
self,
|
||||
container: object,
|
||||
chat_id: str,
|
||||
error: Exception,
|
||||
) -> None:
|
||||
remove = getattr(container, 'remove', None)
|
||||
if not callable(remove):
|
||||
raise SandboxStartError(chat_id) from error
|
||||
|
||||
try:
|
||||
remove(force=True)
|
||||
except NotFound:
|
||||
return
|
||||
except Exception as exc:
|
||||
raise SandboxStartError(chat_id) from exc
|
||||
|
||||
def _session_from_container(self, container: object) -> SandboxSession | None:
|
||||
container_id = str(getattr(container, 'id', '')).strip()
|
||||
labels = getattr(container, 'labels', None)
|
||||
|
|
@ -234,6 +334,14 @@ class DockerSandboxRuntime(SandboxRuntime):
|
|||
try:
|
||||
session_id = UUID(labels['session_id'])
|
||||
chat_id = UUID(labels['chat_id'])
|
||||
agent_id = labels['agent_id']
|
||||
volume_host_path = labels['volume_host_path']
|
||||
endpoint_port = int(labels['endpoint_port'])
|
||||
if not isinstance(agent_id, str) or not isinstance(volume_host_path, str):
|
||||
raise ValueError('invalid sandbox labels')
|
||||
if not Path(volume_host_path).is_absolute() or endpoint_port <= 0:
|
||||
raise ValueError('invalid sandbox labels')
|
||||
endpoint = self._endpoint_from_container(container, endpoint_port)
|
||||
created_at = self._container_created_at(container)
|
||||
expires_at = _parse_datetime(labels['expires_at'])
|
||||
except (KeyError, TypeError, ValueError):
|
||||
|
|
@ -246,18 +354,13 @@ class DockerSandboxRuntime(SandboxRuntime):
|
|||
status=SandboxStatus.RUNNING,
|
||||
created_at=created_at,
|
||||
expires_at=expires_at,
|
||||
agent_id=agent_id,
|
||||
volume_host_path=volume_host_path,
|
||||
endpoint=endpoint,
|
||||
)
|
||||
|
||||
def _container_created_at(self, container: object) -> datetime:
|
||||
attrs = getattr(container, 'attrs', None)
|
||||
if not isinstance(attrs, dict):
|
||||
reload_container = getattr(container, 'reload', None)
|
||||
if callable(reload_container):
|
||||
reload_container()
|
||||
attrs = getattr(container, 'attrs', None)
|
||||
|
||||
if not isinstance(attrs, dict):
|
||||
raise ValueError('invalid container attrs')
|
||||
attrs = self._container_attrs(container)
|
||||
|
||||
raw_created_at = attrs.get('Created')
|
||||
if not isinstance(raw_created_at, str):
|
||||
|
|
@ -265,6 +368,42 @@ class DockerSandboxRuntime(SandboxRuntime):
|
|||
|
||||
return _parse_datetime(raw_created_at)
|
||||
|
||||
def _endpoint_from_container(
|
||||
self,
|
||||
container: object,
|
||||
port: int | None = None,
|
||||
) -> SandboxEndpoint:
|
||||
attrs = self._container_attrs(container)
|
||||
network_settings = attrs.get('NetworkSettings')
|
||||
if not isinstance(network_settings, dict):
|
||||
raise ValueError('invalid endpoint')
|
||||
|
||||
networks = network_settings.get('Networks')
|
||||
if not isinstance(networks, dict):
|
||||
raise ValueError('invalid endpoint')
|
||||
|
||||
network = networks.get(self._config.network_name)
|
||||
if not isinstance(network, dict):
|
||||
raise ValueError('invalid endpoint')
|
||||
|
||||
ip = network.get('IPAddress')
|
||||
if not isinstance(ip, str) or not ip:
|
||||
raise ValueError('invalid endpoint')
|
||||
|
||||
endpoint_port = self._config.agent_service_port if port is None else port
|
||||
return SandboxEndpoint(ip=ip, port=endpoint_port)
|
||||
|
||||
def _container_attrs(self, container: object) -> dict[str, object]:
|
||||
reload_container = getattr(container, 'reload', None)
|
||||
if callable(reload_container):
|
||||
reload_container()
|
||||
|
||||
attrs = getattr(container, 'attrs', None)
|
||||
if not isinstance(attrs, dict):
|
||||
raise ValueError('invalid container attrs')
|
||||
|
||||
return attrs
|
||||
|
||||
def _host_path(self, path_value: str) -> Path:
|
||||
return Path(path_value).expanduser().resolve(strict=False)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
from fastapi import Depends, Request
|
||||
|
||||
from adapter.di.container import AppContainer
|
||||
from usecase.sandbox import CreateSandbox
|
||||
from usecase.sandbox import CreateSandbox, DeleteSandbox
|
||||
|
||||
APP_CONTAINER_STATE = 'container'
|
||||
APP_CONFIG_STATE = 'config'
|
||||
|
|
@ -18,3 +18,9 @@ def get_create_sandbox(
|
|||
container: AppContainer = Depends(get_container),
|
||||
) -> CreateSandbox:
|
||||
return container.usecases.create_sandbox
|
||||
|
||||
|
||||
def get_delete_sandbox(
|
||||
container: AppContainer = Depends(get_container),
|
||||
) -> DeleteSandbox:
|
||||
return container.usecases.delete_sandbox
|
||||
|
|
|
|||
|
|
@ -1,19 +1,30 @@
|
|||
from uuid import UUID
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
|
||||
from adapter.di.container import AppContainer
|
||||
from adapter.http.fastapi.dependencies import (
|
||||
get_container,
|
||||
get_create_sandbox,
|
||||
get_delete_sandbox,
|
||||
)
|
||||
from adapter.http.fastapi.schemas import (
|
||||
CreateSandboxRequest,
|
||||
DeleteSandboxResponse,
|
||||
ErrorResponse,
|
||||
HealthResponse,
|
||||
SandboxEndpointResponse,
|
||||
SandboxSessionResponse,
|
||||
)
|
||||
from domain.error import SandboxError, SandboxStartError
|
||||
from domain.error import SandboxConflictError, SandboxError, SandboxStartError
|
||||
from domain.sandbox import SandboxSession
|
||||
from usecase.sandbox import CreateSandbox, CreateSandboxCommand
|
||||
from usecase.sandbox import (
|
||||
CreateSandbox,
|
||||
CreateSandboxCommand,
|
||||
DeleteSandbox,
|
||||
DeleteSandboxCommand,
|
||||
DeleteSandboxResult,
|
||||
)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
|
@ -35,6 +46,7 @@ def health(container: AppContainer = Depends(get_container)) -> HealthResponse:
|
|||
'/create',
|
||||
response_model=SandboxSessionResponse,
|
||||
responses={
|
||||
status.HTTP_409_CONFLICT: {'model': ErrorResponse},
|
||||
status.HTTP_503_SERVICE_UNAVAILABLE: {'model': ErrorResponse},
|
||||
status.HTTP_500_INTERNAL_SERVER_ERROR: {'model': ErrorResponse},
|
||||
},
|
||||
|
|
@ -45,7 +57,18 @@ def create_sandbox(
|
|||
usecase: CreateSandbox = Depends(get_create_sandbox),
|
||||
) -> SandboxSessionResponse:
|
||||
try:
|
||||
session = usecase.execute(CreateSandboxCommand(chat_id=request.chat_id))
|
||||
session = usecase.execute(
|
||||
CreateSandboxCommand(
|
||||
chat_id=request.chat_id,
|
||||
agent_id=request.agent_id,
|
||||
volume_host_path=request.volume_host_path,
|
||||
)
|
||||
)
|
||||
except SandboxConflictError as exc:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_409_CONFLICT,
|
||||
detail=str(exc),
|
||||
) from exc
|
||||
except SandboxStartError as exc:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
|
|
@ -60,11 +83,55 @@ def create_sandbox(
|
|||
return _to_sandbox_session_response(session)
|
||||
|
||||
|
||||
@router.delete(
|
||||
'/sandboxes/{chat_id}',
|
||||
response_model=DeleteSandboxResponse,
|
||||
responses={
|
||||
status.HTTP_500_INTERNAL_SERVER_ERROR: {'model': ErrorResponse},
|
||||
},
|
||||
status_code=status.HTTP_200_OK,
|
||||
)
|
||||
def delete_sandbox(
|
||||
chat_id: UUID,
|
||||
usecase: DeleteSandbox = Depends(get_delete_sandbox),
|
||||
) -> DeleteSandboxResponse:
|
||||
try:
|
||||
result = usecase.execute(DeleteSandboxCommand(chat_id=chat_id))
|
||||
except SandboxError as exc:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=str(exc),
|
||||
) from exc
|
||||
|
||||
return _to_delete_sandbox_response(result)
|
||||
|
||||
|
||||
def _to_sandbox_session_response(session: SandboxSession) -> SandboxSessionResponse:
|
||||
if session.endpoint is None:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail='sandbox_endpoint_unavailable',
|
||||
)
|
||||
|
||||
return SandboxSessionResponse(
|
||||
session_id=session.session_id,
|
||||
chat_id=session.chat_id,
|
||||
agent_id=session.agent_id,
|
||||
volume_host_path=session.volume_host_path,
|
||||
container_id=session.container_id,
|
||||
endpoint=SandboxEndpointResponse(
|
||||
ip=session.endpoint.ip,
|
||||
port=session.endpoint.port,
|
||||
),
|
||||
status=session.status.value,
|
||||
expires_at=session.expires_at,
|
||||
)
|
||||
|
||||
|
||||
def _to_delete_sandbox_response(result: DeleteSandboxResult) -> DeleteSandboxResponse:
|
||||
return DeleteSandboxResponse(
|
||||
chat_id=result.chat_id,
|
||||
result=result.result,
|
||||
session_id=result.session_id,
|
||||
container_id=result.container_id,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from uuid import UUID
|
||||
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
from pydantic import BaseModel, ConfigDict, field_validator
|
||||
|
||||
|
||||
class HealthResponse(BaseModel):
|
||||
|
|
@ -14,15 +15,47 @@ class CreateSandboxRequest(BaseModel):
|
|||
model_config = ConfigDict(extra='forbid')
|
||||
|
||||
chat_id: UUID
|
||||
agent_id: str
|
||||
volume_host_path: str
|
||||
|
||||
@field_validator('agent_id')
|
||||
@classmethod
|
||||
def validate_agent_id(cls, value: str) -> str:
|
||||
if not value.strip():
|
||||
raise ValueError('invalid agent_id')
|
||||
return value
|
||||
|
||||
@field_validator('volume_host_path')
|
||||
@classmethod
|
||||
def validate_volume_host_path(cls, value: str) -> str:
|
||||
path = Path(value).expanduser()
|
||||
if not path.is_absolute():
|
||||
raise ValueError('invalid volume_host_path')
|
||||
return str(path.resolve(strict=False))
|
||||
|
||||
|
||||
class SandboxEndpointResponse(BaseModel):
|
||||
ip: str
|
||||
port: int
|
||||
|
||||
|
||||
class SandboxSessionResponse(BaseModel):
|
||||
session_id: UUID
|
||||
chat_id: UUID
|
||||
agent_id: str
|
||||
volume_host_path: str
|
||||
container_id: str
|
||||
endpoint: SandboxEndpointResponse
|
||||
status: str
|
||||
expires_at: datetime
|
||||
|
||||
|
||||
class DeleteSandboxResponse(BaseModel):
|
||||
chat_id: UUID
|
||||
result: str
|
||||
session_id: UUID | None = None
|
||||
container_id: str | None = None
|
||||
|
||||
|
||||
class ErrorResponse(BaseModel):
|
||||
detail: str
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue