add sandbox network auto-create and restore dind compose

This commit is contained in:
Азамат Нураев 2026-05-05 09:42:22 +03:00
parent 43bd4bcbff
commit 06271db003
4 changed files with 65 additions and 8 deletions

View file

@ -1,6 +1,7 @@
import asyncio
from collections.abc import Awaitable, Callable
from docker.errors import NotFound
from fastapi import FastAPI
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
@ -47,6 +48,19 @@ def create_app(config: AppConfig | None = None) -> FastAPI:
raise
def _ensure_sandbox_network(container: AppContainer) -> None:
client = container._docker_client
network_name = container.config.sandbox.network_name
try:
client.networks.get(network_name)
except NotFound:
client.networks.create(network_name)
container.observability.logger.info(
'sandbox_network_created',
attrs={'network': network_name},
)
def _build_startup_handler(
app: FastAPI,
container: AppContainer,
@ -56,6 +70,7 @@ def _build_startup_handler(
if task is not None and not task.done():
return
await asyncio.to_thread(_ensure_sandbox_network, container)
await asyncio.to_thread(container.sandbox_reconciler.execute)
stop_event = asyncio.Event()

View file

@ -7,9 +7,9 @@ http:
port: 8123
logging:
level: INFO
output: otel
format: json
level: DEBUG
output: stdout
format: text
metrics:
enabled: true
@ -25,7 +25,7 @@ otel:
metric_export_interval: 1000
docker:
base_url: unix:///var/run/docker.sock
base_url: tcp://docker-engine:2375
sandbox:
image: nginx:1.27-alpine
@ -33,9 +33,9 @@ sandbox:
agent_service_port: 8000
ttl_seconds: 300
cleanup_interval_seconds: 60
chats_root: /tmp/master-sandbox/chats
dependencies_host_path: /tmp/master-sandbox/dependencies
lambda_tools_host_path: /tmp/master-sandbox/lambda-tools
chats_root: /var/lib/master-sandbox/chats
dependencies_host_path: /var/lib/master-dependencies
lambda_tools_host_path: /var/lib/master-lambda-tools
chat_mount_path: /workspace/chat
dependencies_mount_path: /opt/dependencies
lambda_tools_mount_path: /opt/lambda-tools

View file

@ -6,6 +6,8 @@ services:
target: run
user: root
depends_on:
docker-engine:
condition: service_healthy
otel-collector:
condition: service_started
environment:
@ -15,7 +17,30 @@ services:
- '127.0.0.1:8123:8123'
volumes:
- ./config/docker-compose.yml:/app/config/app.yaml:ro
- /var/run/docker.sock:/var/run/docker.sock
- sandbox-data:/var/lib/master-sandbox
- sandbox-dependencies:/var/lib/master-dependencies:ro
- sandbox-tools:/var/lib/master-lambda-tools:ro
docker-engine:
image: docker:28-dind
privileged: true
environment:
DOCKER_TLS_CERTDIR: ''
command:
- --host=tcp://0.0.0.0:2375
healthcheck:
test:
- CMD
- docker
- info
interval: 5s
timeout: 5s
retries: 12
volumes:
- docker-data:/var/lib/docker
- sandbox-data:/var/lib/master-sandbox
- sandbox-dependencies:/var/lib/master-dependencies
- sandbox-tools:/var/lib/master-lambda-tools
otel-collector:
image: grafana/otel-lgtm:latest
@ -25,4 +50,8 @@ services:
- lgtm-data:/data
volumes:
docker-data:
lgtm-data:
sandbox-data:
sandbox-dependencies:
sandbox-tools:

View file

@ -1,6 +1,7 @@
import asyncio
import json
from datetime import UTC, datetime, timedelta
from typing import Any
from uuid import UUID
import pytest
@ -109,10 +110,22 @@ class FakeDockerClient(DockerClient):
self.base_url = base_url
self.close_calls = 0
@property # type: ignore[override]
def networks(self) -> Any:
return _FakeNetworks()
def close(self) -> None:
self.close_calls += 1
class _FakeNetworks:
def get(self, name: str) -> None:
return None
def create(self, name: str) -> None:
return None
class EmptySandboxState:
def __init__(self) -> None:
self.calls = 0