master/adapter/docker/runtime.py
2026-04-03 01:15:23 +03:00

315 lines
11 KiB
Python

import time
from datetime import datetime
from pathlib import Path
from uuid import UUID
from docker import DockerClient
from docker.errors import DockerException, NotFound
from docker.types import Mount
from adapter.config.model import SandboxConfig
from domain.error import SandboxError, SandboxStartError
from domain.sandbox import SandboxSession, SandboxStatus
from usecase.interface import Metrics, SandboxRuntime, Span, Tracer
SANDBOX_LABELS = ('session_id', 'chat_id', 'expires_at')
class DockerSandboxRuntime(SandboxRuntime):
def __init__(
self,
config: SandboxConfig,
client: DockerClient,
metrics: Metrics,
tracer: Tracer,
) -> None:
self._config = config
self._client = client
self._metrics = metrics
self._tracer = tracer
def create(
self,
*,
session_id: UUID,
chat_id: UUID,
created_at: datetime,
expires_at: datetime,
) -> SandboxSession:
started_at = time.perf_counter()
result = 'error'
with self._tracer.start_span(
'adapter.docker.create_sandbox',
attrs={
'chat.id': str(chat_id),
'session.id': str(session_id),
},
) as span:
try:
try:
chat_path = self._chat_path(chat_id)
dependencies_path = self._readonly_host_path(
self._config.dependencies_host_path
)
lambda_tools_path = self._readonly_host_path(
self._config.lambda_tools_host_path
)
chat_path.mkdir(parents=True, exist_ok=True)
container = self._client.containers.run(
self._config.image,
detach=True,
labels=self._labels(session_id, chat_id, expires_at),
mounts=self._mounts(
chat_path,
dependencies_path,
lambda_tools_path,
),
)
except (DockerException, OSError, ValueError) as exc:
raise SandboxStartError(str(chat_id)) from exc
container_id = str(getattr(container, 'id', '')).strip()
if not container_id:
raise SandboxStartError(str(chat_id))
result = 'created'
span.set_attribute('container.id', container_id)
span.set_attribute('sandbox.result', result)
return SandboxSession(
session_id=session_id,
chat_id=chat_id,
container_id=container_id,
status=SandboxStatus.RUNNING,
created_at=created_at,
expires_at=expires_at,
)
except Exception as exc:
span.set_attribute('sandbox.result', result)
span.record_error(exc)
self._metrics.increment(
'sandbox.runtime.error.total',
attrs=_runtime_error_metric_attrs('create', _error_type(exc)),
)
raise
finally:
self._metrics.record(
'sandbox.runtime.create.duration_ms',
_duration_ms(started_at),
attrs=_runtime_metric_attrs('create', result),
)
def stop(self, container_id: str) -> None:
started_at = time.perf_counter()
result = 'error'
with self._tracer.start_span(
'adapter.docker.stop_sandbox',
attrs={'container.id': container_id},
) as span:
try:
container = self._client.containers.get(container_id)
_set_span_container_attrs(span, container)
container.stop()
result = 'stopped'
span.set_attribute('sandbox.result', result)
except NotFound:
result = 'not_found'
span.set_attribute('sandbox.result', result)
return
except DockerException as exc:
span.set_attribute('sandbox.result', result)
span.record_error(exc)
self._metrics.increment(
'sandbox.runtime.error.total',
attrs=_runtime_error_metric_attrs('stop', type(exc).__name__),
)
raise SandboxError('sandbox_stop_failed') from exc
finally:
self._metrics.record(
'sandbox.runtime.stop.duration_ms',
_duration_ms(started_at),
attrs=_runtime_metric_attrs('stop', result),
)
def list_active_sessions(self) -> list[SandboxSession]:
started_at = time.perf_counter()
result = 'error'
with self._tracer.start_span(
'adapter.docker.list_active_sandboxes',
) as span:
try:
try:
containers = self._client.containers.list(
filters={'label': list(SANDBOX_LABELS)}
)
except DockerException as exc:
raise SandboxError('sandbox_list_failed') from exc
sessions: list[SandboxSession] = []
for container in containers:
session = self._session_from_container(container)
if session is None:
continue
sessions.append(session)
result = 'listed'
span.set_attribute('sandbox.container_count', len(containers))
span.set_attribute('sandbox.active_count', len(sessions))
span.set_attribute('sandbox.result', result)
return sessions
except Exception as exc:
span.set_attribute('sandbox.result', result)
span.record_error(exc)
self._metrics.increment(
'sandbox.runtime.error.total',
attrs=_runtime_error_metric_attrs('list_active', _error_type(exc)),
)
raise
finally:
self._metrics.record(
'sandbox.runtime.list_active.duration_ms',
_duration_ms(started_at),
attrs=_runtime_metric_attrs('list_active', result),
)
def _labels(
self,
session_id: UUID,
chat_id: UUID,
expires_at: datetime,
) -> dict[str, str]:
return {
'session_id': str(session_id),
'chat_id': str(chat_id),
'expires_at': expires_at.isoformat(),
}
def _mounts(
self,
chat_path: Path,
dependencies_path: Path,
lambda_tools_path: Path,
) -> list[Mount]:
return [
Mount(
target=self._config.chat_mount_path,
source=str(chat_path),
type='bind',
),
Mount(
target=self._config.dependencies_mount_path,
source=str(dependencies_path),
type='bind',
read_only=True,
),
Mount(
target=self._config.lambda_tools_mount_path,
source=str(lambda_tools_path),
type='bind',
read_only=True,
),
]
def _chat_path(self, chat_id: UUID) -> Path:
chats_root = self._host_path(self._config.chats_root)
chat_path = (chats_root / str(chat_id)).resolve(strict=False)
if not chat_path.is_relative_to(chats_root):
raise ValueError('invalid chat path')
return chat_path
def _readonly_host_path(self, path_value: str) -> Path:
host_path = self._host_path(path_value)
if not host_path.exists():
raise ValueError('invalid host path')
return host_path
def _session_from_container(self, container: object) -> SandboxSession | None:
container_id = str(getattr(container, 'id', '')).strip()
labels = getattr(container, 'labels', None)
if not container_id or not isinstance(labels, dict):
return None
try:
session_id = UUID(labels['session_id'])
chat_id = UUID(labels['chat_id'])
created_at = self._container_created_at(container)
expires_at = _parse_datetime(labels['expires_at'])
except (KeyError, TypeError, ValueError):
return None
return SandboxSession(
session_id=session_id,
chat_id=chat_id,
container_id=container_id,
status=SandboxStatus.RUNNING,
created_at=created_at,
expires_at=expires_at,
)
def _container_created_at(self, container: object) -> datetime:
attrs = getattr(container, 'attrs', None)
if not isinstance(attrs, dict):
reload_container = getattr(container, 'reload', None)
if callable(reload_container):
reload_container()
attrs = getattr(container, 'attrs', None)
if not isinstance(attrs, dict):
raise ValueError('invalid container attrs')
raw_created_at = attrs.get('Created')
if not isinstance(raw_created_at, str):
raise ValueError('invalid created_at')
return _parse_datetime(raw_created_at)
def _host_path(self, path_value: str) -> Path:
return Path(path_value).expanduser().resolve(strict=False)
def _parse_datetime(value: str) -> datetime:
normalized = f'{value[:-1]}+00:00' if value.endswith('Z') else value
return datetime.fromisoformat(normalized)
def _duration_ms(started_at: float) -> float:
return (time.perf_counter() - started_at) * 1000
def _runtime_metric_attrs(operation: str, result: str) -> dict[str, str]:
return {
'operation': operation,
'result': result,
}
def _runtime_error_metric_attrs(
operation: str,
error_type: str,
) -> dict[str, str]:
return {
'operation': operation,
'error.type': error_type,
}
def _error_type(error: Exception) -> str:
if isinstance(error.__cause__, Exception):
return type(error.__cause__).__name__
return type(error).__name__
def _set_span_container_attrs(span: Span, container: object) -> None:
labels = getattr(container, 'labels', None)
if not isinstance(labels, dict):
return
session_id = labels.get('session_id')
if isinstance(session_id, str) and session_id:
span.set_attribute('session.id', session_id)
chat_id = labels.get('chat_id')
if isinstance(chat_id, str) and chat_id:
span.set_attribute('chat.id', chat_id)