add sandbox network auto-create and restore dind compose

This commit is contained in:
Азамат Нураев 2026-05-05 09:42:22 +03:00
parent 43bd4bcbff
commit 06271db003
4 changed files with 65 additions and 8 deletions

View file

@ -1,6 +1,7 @@
import asyncio import asyncio
from collections.abc import Awaitable, Callable from collections.abc import Awaitable, Callable
from docker.errors import NotFound
from fastapi import FastAPI from fastapi import FastAPI
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
@ -47,6 +48,19 @@ def create_app(config: AppConfig | None = None) -> FastAPI:
raise raise
def _ensure_sandbox_network(container: AppContainer) -> None:
client = container._docker_client
network_name = container.config.sandbox.network_name
try:
client.networks.get(network_name)
except NotFound:
client.networks.create(network_name)
container.observability.logger.info(
'sandbox_network_created',
attrs={'network': network_name},
)
def _build_startup_handler( def _build_startup_handler(
app: FastAPI, app: FastAPI,
container: AppContainer, container: AppContainer,
@ -56,6 +70,7 @@ def _build_startup_handler(
if task is not None and not task.done(): if task is not None and not task.done():
return return
await asyncio.to_thread(_ensure_sandbox_network, container)
await asyncio.to_thread(container.sandbox_reconciler.execute) await asyncio.to_thread(container.sandbox_reconciler.execute)
stop_event = asyncio.Event() stop_event = asyncio.Event()

View file

@ -7,9 +7,9 @@ http:
port: 8123 port: 8123
logging: logging:
level: INFO level: DEBUG
output: otel output: stdout
format: json format: text
metrics: metrics:
enabled: true enabled: true
@ -25,7 +25,7 @@ otel:
metric_export_interval: 1000 metric_export_interval: 1000
docker: docker:
base_url: unix:///var/run/docker.sock base_url: tcp://docker-engine:2375
sandbox: sandbox:
image: nginx:1.27-alpine image: nginx:1.27-alpine
@ -33,9 +33,9 @@ sandbox:
agent_service_port: 8000 agent_service_port: 8000
ttl_seconds: 300 ttl_seconds: 300
cleanup_interval_seconds: 60 cleanup_interval_seconds: 60
chats_root: /tmp/master-sandbox/chats chats_root: /var/lib/master-sandbox/chats
dependencies_host_path: /tmp/master-sandbox/dependencies dependencies_host_path: /var/lib/master-dependencies
lambda_tools_host_path: /tmp/master-sandbox/lambda-tools lambda_tools_host_path: /var/lib/master-lambda-tools
chat_mount_path: /workspace/chat chat_mount_path: /workspace/chat
dependencies_mount_path: /opt/dependencies dependencies_mount_path: /opt/dependencies
lambda_tools_mount_path: /opt/lambda-tools lambda_tools_mount_path: /opt/lambda-tools

View file

@ -6,6 +6,8 @@ services:
target: run target: run
user: root user: root
depends_on: depends_on:
docker-engine:
condition: service_healthy
otel-collector: otel-collector:
condition: service_started condition: service_started
environment: environment:
@ -15,7 +17,30 @@ services:
- '127.0.0.1:8123:8123' - '127.0.0.1:8123:8123'
volumes: volumes:
- ./config/docker-compose.yml:/app/config/app.yaml:ro - ./config/docker-compose.yml:/app/config/app.yaml:ro
- /var/run/docker.sock:/var/run/docker.sock - sandbox-data:/var/lib/master-sandbox
- sandbox-dependencies:/var/lib/master-dependencies:ro
- sandbox-tools:/var/lib/master-lambda-tools:ro
docker-engine:
image: docker:28-dind
privileged: true
environment:
DOCKER_TLS_CERTDIR: ''
command:
- --host=tcp://0.0.0.0:2375
healthcheck:
test:
- CMD
- docker
- info
interval: 5s
timeout: 5s
retries: 12
volumes:
- docker-data:/var/lib/docker
- sandbox-data:/var/lib/master-sandbox
- sandbox-dependencies:/var/lib/master-dependencies
- sandbox-tools:/var/lib/master-lambda-tools
otel-collector: otel-collector:
image: grafana/otel-lgtm:latest image: grafana/otel-lgtm:latest
@ -25,4 +50,8 @@ services:
- lgtm-data:/data - lgtm-data:/data
volumes: volumes:
docker-data:
lgtm-data: lgtm-data:
sandbox-data:
sandbox-dependencies:
sandbox-tools:

View file

@ -1,6 +1,7 @@
import asyncio import asyncio
import json import json
from datetime import UTC, datetime, timedelta from datetime import UTC, datetime, timedelta
from typing import Any
from uuid import UUID from uuid import UUID
import pytest import pytest
@ -109,10 +110,22 @@ class FakeDockerClient(DockerClient):
self.base_url = base_url self.base_url = base_url
self.close_calls = 0 self.close_calls = 0
@property # type: ignore[override]
def networks(self) -> Any:
return _FakeNetworks()
def close(self) -> None: def close(self) -> None:
self.close_calls += 1 self.close_calls += 1
class _FakeNetworks:
def get(self, name: str) -> None:
return None
def create(self, name: str) -> None:
return None
class EmptySandboxState: class EmptySandboxState:
def __init__(self) -> None: def __init__(self) -> None:
self.calls = 0 self.calls = 0