fix sandbox create rollback gap
This commit is contained in:
parent
b4a2a9ceea
commit
9b6c7908ad
3 changed files with 94 additions and 3 deletions
38
tasks.md
38
tasks.md
|
|
@ -317,9 +317,45 @@
|
|||
### M26. Повторный boundary review для sandbox observability
|
||||
|
||||
- Субагент: `code-reviewer`
|
||||
- Статус: pending
|
||||
- Статус: in_progress
|
||||
- Зависимости: `M25`
|
||||
- Commit required: no
|
||||
- Scope: подтвердить, что follow-up fixes закрыли M23 замечания без новых boundary нарушений
|
||||
- Файлы: весь измененный код после `M24`-`M25`
|
||||
- Критерии приемки: нет замечаний по replace tracing identity и missing failure-path observability coverage; clean architecture по-прежнему соблюдена
|
||||
|
||||
## Follow-up после M26 boundary review
|
||||
|
||||
### M27. Компенсация save failure после runtime.create
|
||||
|
||||
- Субагент: `feature-developer`
|
||||
- Статус: completed
|
||||
- Зависимости: `M26`
|
||||
- Commit required: yes
|
||||
- Commit message: `fix sandbox create rollback gap`
|
||||
- Scope: не оставлять untracked running container и неконсистентный `sandbox.active.count` при падении `repository.save()` после успешного `runtime.create()`
|
||||
- Файлы: `usecase/sandbox.py`, при необходимости точечные тесты в `test/*`
|
||||
- Решение: сделать create/replace path registry-safe через rollback или другой явный compensation path без нарушения clean architecture
|
||||
- Критерии приемки: save failure не оставляет новый container в runtime без registry state; `sandbox.active.count` отражает финальное committed state; replace и fresh-create failure paths консистентны
|
||||
|
||||
### M28. Регрессии на rollback и startup failure observability
|
||||
|
||||
- Субагент: `test-engineer`
|
||||
- Статус: pending
|
||||
- Зависимости: `M27`
|
||||
- Commit required: yes
|
||||
- Commit message: `add sandbox rollback regression tests`
|
||||
- Scope: покрыть tests для save-failure rollback и startup observability failure paths
|
||||
- Файлы: `test/test_sandbox_usecase.py`, `test/test_docker_runtime.py`, `test/test_create_http.py`, при необходимости другие focused tests в `test/*`
|
||||
- Решение: добавить tests на fresh-create/replace save failure compensation, `list_active` failure observability и reconciliation failure span/metric expectations где применимо
|
||||
- Критерии приемки: rollback path покрыт; list/reconciliation failure observability не регрессирует; tests остаются presence-based и стабильными
|
||||
|
||||
### M29. Финальный boundary review для sandbox observability
|
||||
|
||||
- Субагент: `code-reviewer`
|
||||
- Статус: pending
|
||||
- Зависимости: `M28`
|
||||
- Commit required: no
|
||||
- Scope: подтвердить, что M27-M28 закрыли remaining M26 замечания
|
||||
- Файлы: весь измененный код после `M27`-`M28`
|
||||
- Критерии приемки: нет замечаний по rollback gap и startup failure observability coverage; sandbox observability slice приемлем as-is
|
||||
|
|
|
|||
|
|
@ -669,6 +669,39 @@ def test_create_sandbox_error_records_observability(monkeypatch) -> None:
|
|||
assert excinfo.value in span.errors
|
||||
|
||||
|
||||
def test_create_sandbox_save_failure_stops_untracked_container(monkeypatch) -> None:
|
||||
now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
|
||||
repository = FailingSaveRepository(RuntimeError('save_failed'))
|
||||
repository.fail_next_save()
|
||||
metrics = RecordingMetrics()
|
||||
runtime = FakeRuntime()
|
||||
usecase = CreateSandbox(
|
||||
repository=repository,
|
||||
locker=FakeLocker(),
|
||||
runtime=runtime,
|
||||
clock=FakeClock(now),
|
||||
logger=FakeLogger(),
|
||||
metrics=metrics,
|
||||
tracer=NoopTracer(),
|
||||
ttl=timedelta(minutes=5),
|
||||
)
|
||||
monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
|
||||
|
||||
with pytest.raises(RuntimeError, match='save_failed'):
|
||||
usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
|
||||
|
||||
assert len(runtime.create_calls) == 1
|
||||
assert runtime.stop_calls == [f'container-{SESSION_NEW_ID}']
|
||||
assert repository.get_active_by_chat_id(CHAT_ID) is None
|
||||
assert _active_count_values(metrics)
|
||||
assert _active_count_values(metrics)[-1] == 0
|
||||
_assert_increment_metric_present(
|
||||
metrics,
|
||||
'sandbox.create.total',
|
||||
attrs={'result': 'error'},
|
||||
)
|
||||
|
||||
|
||||
def test_create_sandbox_replace_stop_failure_preserves_separate_identities(
|
||||
monkeypatch,
|
||||
) -> None:
|
||||
|
|
@ -755,9 +788,11 @@ def test_create_sandbox_replace_save_failure_records_stage_safe_trace_ids(
|
|||
with pytest.raises(RuntimeError, match='save_failed') as excinfo:
|
||||
usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
|
||||
|
||||
assert runtime.stop_calls == ['container-old']
|
||||
assert runtime.stop_calls == ['container-old', f'container-{SESSION_NEW_ID}']
|
||||
assert len(runtime.create_calls) == 1
|
||||
assert repository.get_active_by_chat_id(CHAT_ID) is None
|
||||
assert _active_count_values(metrics)
|
||||
assert _active_count_values(metrics)[-1] == 0
|
||||
_assert_increment_metric_present(
|
||||
metrics,
|
||||
'sandbox.create.total',
|
||||
|
|
|
|||
|
|
@ -107,7 +107,7 @@ class CreateSandbox:
|
|||
'sandbox.new_container.id',
|
||||
new_session.container_id,
|
||||
)
|
||||
self._repository.save(new_session)
|
||||
self._save_created_session(new_session)
|
||||
_set_active_count(self._metrics, self._repository)
|
||||
if result == 'replaced':
|
||||
span.set_attribute('session.id', str(new_session.session_id))
|
||||
|
|
@ -131,6 +131,26 @@ class CreateSandbox:
|
|||
span.record_error(exc)
|
||||
raise
|
||||
|
||||
def _save_created_session(self, session: SandboxSession) -> None:
|
||||
try:
|
||||
self._repository.save(session)
|
||||
except Exception as exc:
|
||||
self._compensate_save_failure(session, exc)
|
||||
raise
|
||||
|
||||
def _compensate_save_failure(
|
||||
self,
|
||||
session: SandboxSession,
|
||||
error: Exception,
|
||||
) -> None:
|
||||
try:
|
||||
self._runtime.stop(session.container_id)
|
||||
except Exception as stop_error:
|
||||
_set_active_count(self._metrics, self._repository)
|
||||
raise error from stop_error
|
||||
|
||||
_set_active_count(self._metrics, self._repository)
|
||||
|
||||
|
||||
class CleanupExpiredSandboxes:
|
||||
def __init__(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue