diff --git a/tasks.md b/tasks.md
index 4b9e7e5..e713a66 100644
--- a/tasks.md
+++ b/tasks.md
@@ -305,7 +305,7 @@
 ### M25. Добрать failure-path observability regression tests
 
 - Субагент: `test-engineer`
-- Статус: pending
+- Статус: completed
 - Зависимости: `M24`
 - Commit required: yes
 - Commit message: `add sandbox observability failure tests`
diff --git a/test/test_docker_runtime.py b/test/test_docker_runtime.py
index 4db1095..352adad 100644
--- a/test/test_docker_runtime.py
+++ b/test/test_docker_runtime.py
@@ -43,6 +43,27 @@ class FakeListedContainer(FakeContainer):
         self.attrs = {'Created': created_at}
 
 
+class FailingStopContainer(FakeListedContainer):
+    def __init__(
+        self,
+        container_id: str,
+        *,
+        labels: dict[str, str],
+        created_at: str,
+        error: Exception,
+    ) -> None:
+        super().__init__(
+            container_id,
+            labels=labels,
+            created_at=created_at,
+        )
+        self._error = error
+
+    def stop(self) -> None:
+        self.stop_calls += 1
+        raise self._error
+
+
 class RunKwargs(TypedDict):
     detach: bool
     labels: dict[str, str]
@@ -435,23 +456,143 @@ def test_runtime_stop_ignores_missing_container(tmp_path: Path) -> None:
     config = build_config(tmp_path)
     containers = FakeContainers()
     containers.get_result = NotFound('missing')
-    runtime = build_runtime(config, containers)
+    metrics = RecordingMetrics()
+    tracer = RecordingTracer()
+    runtime = DockerSandboxRuntime(
+        config,
+        FakeDockerClient(containers),
+        metrics,
+        tracer,
+    )
 
     runtime.stop('container-123')
 
     assert containers.get_calls == ['container-123']
+    duration_call = _find_record_call(
+        metrics,
+        'sandbox.runtime.stop.duration_ms',
+        attrs={'operation': 'stop', 'result': 'not_found'},
+    )
+    assert duration_call[1] >= 0
+    span = _find_span(
+        tracer,
+        'adapter.docker.stop_sandbox',
+        {'container.id': 'container-123'},
+        {'sandbox.result': 'not_found'},
+    )
+    assert not span.errors
+    stop_error_calls = [
+        call
+        for call in metrics.increment_calls
+        if call[0] == 'sandbox.runtime.error.total'
+        and call[2] is not None
+        and call[2].get('operation') == 'stop'
+    ]
+    assert stop_error_calls == []
 
 
 def test_runtime_stop_wraps_docker_errors(tmp_path: Path) -> None:
     config = build_config(tmp_path)
     containers = FakeContainers()
-    containers.get_result = DockerException('boom')
-    runtime = build_runtime(config, containers)
+    containers.get_result = FailingStopContainer(
+        'container-123',
+        labels={
+            'session_id': str(SESSION_ID),
+            'chat_id': str(CHAT_ID),
+            'expires_at': '2026-04-02T12:05:00+00:00',
+        },
+        created_at='2026-04-02T12:00:00Z',
+        error=DockerException('boom'),
+    )
+    metrics = RecordingMetrics()
+    tracer = RecordingTracer()
+    runtime = DockerSandboxRuntime(
+        config,
+        FakeDockerClient(containers),
+        metrics,
+        tracer,
+    )
 
     with pytest.raises(SandboxError) as excinfo:
         runtime.stop('container-123')
 
     assert str(excinfo.value) == 'sandbox_stop_failed'
+    _find_increment_call(
+        metrics,
+        'sandbox.runtime.error.total',
+        attrs={'operation': 'stop', 'error.type': 'DockerException'},
+    )
+    duration_call = _find_record_call(
+        metrics,
+        'sandbox.runtime.stop.duration_ms',
+        attrs={'operation': 'stop', 'result': 'error'},
+    )
+    assert duration_call[1] >= 0
+    span = _find_span(
+        tracer,
+        'adapter.docker.stop_sandbox',
+        {'container.id': 'container-123'},
+        {
+            'session.id': str(SESSION_ID),
+            'chat.id': str(CHAT_ID),
+            'sandbox.result': 'error',
+        },
+    )
+    cause = excinfo.value.__cause__
+    assert isinstance(cause, DockerException)
+    assert cause in span.errors
+
+
+def test_runtime_stop_records_observability_on_success(tmp_path: Path) -> None:
+    config = build_config(tmp_path)
+    containers = FakeContainers()
+    container = FakeListedContainer(
+        'container-123',
+        labels={
+            'session_id': str(SESSION_ID),
+            'chat_id': str(CHAT_ID),
+            'expires_at': '2026-04-02T12:05:00+00:00',
+        },
+        created_at='2026-04-02T12:00:00Z',
+    )
+    containers.get_result = container
+    metrics = RecordingMetrics()
+    tracer = RecordingTracer()
+    runtime = DockerSandboxRuntime(
+        config,
+        FakeDockerClient(containers),
+        metrics,
+        tracer,
+    )
+
+    runtime.stop('container-123')
+
+    assert container.stop_calls == 1
+    duration_call = _find_record_call(
+        metrics,
+        'sandbox.runtime.stop.duration_ms',
+        attrs={'operation': 'stop', 'result': 'stopped'},
+    )
+    assert duration_call[1] >= 0
+    span = _find_span(
+        tracer,
+        'adapter.docker.stop_sandbox',
+        {'container.id': 'container-123'},
+        {
+            'session.id': str(SESSION_ID),
+            'chat.id': str(CHAT_ID),
+            'sandbox.result': 'stopped',
+        },
+    )
+    assert not span.errors
+    stop_error_calls = [
+        call
+        for call in metrics.increment_calls
+        if call[0] == 'sandbox.runtime.error.total'
+        and call[2] is not None
+        and call[2].get('operation') == 'stop'
+    ]
+    assert stop_error_calls == []
 
 
 def test_runtime_list_active_sessions_reads_valid_labeled_containers(
diff --git a/test/test_sandbox_usecase.py b/test/test_sandbox_usecase.py
index 92c7937..068204c 100644
--- a/test/test_sandbox_usecase.py
+++ b/test/test_sandbox_usecase.py
@@ -281,6 +281,22 @@ class StaleSnapshotRepository(InMemorySandboxSessionRepository):
         return [self._snapshot]
 
 
+class FailingSaveRepository(InMemorySandboxSessionRepository):
+    def __init__(self, error: Exception) -> None:
+        super().__init__()
+        self._error = error
+        self._fail_next_save = False
+
+    def fail_next_save(self) -> None:
+        self._fail_next_save = True
+
+    def save(self, session: SandboxSession) -> None:
+        if self._fail_next_save:
+            self._fail_next_save = False
+            raise self._error
+        super().save(session)
+
+
 class FakeRuntime:
     def __init__(self) -> None:
         self.create_calls: list[dict[str, object]] = []
@@ -706,6 +722,64 @@ def test_create_sandbox_replace_stop_failure_preserves_separate_identities(
     assert excinfo.value in span.errors
 
 
+def test_create_sandbox_replace_save_failure_records_stage_safe_trace_ids(
+    monkeypatch,
+) -> None:
+    now = datetime(2026, 4, 2, 12, 0, tzinfo=UTC)
+    expired_session = SandboxSession(
+        session_id=SESSION_OLD_ID,
+        chat_id=CHAT_ID,
+        container_id='container-old',
+        status=SandboxStatus.RUNNING,
+        created_at=now - timedelta(minutes=10),
+        expires_at=now,
+    )
+    repository = FailingSaveRepository(RuntimeError('save_failed'))
+    repository.save(expired_session)
+    repository.fail_next_save()
+    metrics = RecordingMetrics()
+    tracer = RecordingTracer()
+    runtime = FakeRuntime()
+    usecase = CreateSandbox(
+        repository=repository,
+        locker=FakeLocker(),
+        runtime=runtime,
+        clock=FakeClock(now),
+        logger=FakeLogger(),
+        metrics=metrics,
+        tracer=tracer,
+        ttl=timedelta(minutes=5),
+    )
+    monkeypatch.setattr('usecase.sandbox._new_session_id', lambda: SESSION_NEW_ID)
+
+    with pytest.raises(RuntimeError, match='save_failed') as excinfo:
+        usecase.execute(CreateSandboxCommand(chat_id=CHAT_ID))
+
+    assert runtime.stop_calls == ['container-old']
+    assert len(runtime.create_calls) == 1
+    assert repository.get_active_by_chat_id(CHAT_ID) is None
+    _assert_increment_metric_present(
+        metrics,
+        'sandbox.create.total',
+        attrs={'result': 'error'},
+    )
+    span = _find_span(
+        tracer,
+        'usecase.create_sandbox',
+        {'chat.id': str(CHAT_ID)},
+        {
+            'sandbox.previous_session.id': str(SESSION_OLD_ID),
+            'sandbox.previous_container.id': 'container-old',
+            'sandbox.new_session.id': str(SESSION_NEW_ID),
+            'sandbox.new_container.id': f'container-{SESSION_NEW_ID}',
+            'sandbox.result': 'error',
+        },
+    )
+    assert 'session.id' not in span.attrs
+    assert 'container.id' not in span.attrs
+    assert excinfo.value in span.errors
+
+
 def test_create_sandbox_serializes_duplicate_concurrent_create_for_chat_id(
     monkeypatch,
 ) -> None:
@@ -982,6 +1056,8 @@ def test_cleanup_expired_sandboxes_continues_after_stop_failure() -> None:
     repository.save(cleaned_session)
     runtime = FailingStopRuntime('container-fail')
     logger = FakeLogger()
+    metrics = RecordingMetrics()
+    tracer = RecordingTracer()
     locker = FakeLocker()
     usecase = CleanupExpiredSandboxes(
         repository=repository,
@@ -989,8 +1065,8 @@ def test_cleanup_expired_sandboxes_continues_after_stop_failure() -> None:
         runtime=runtime,
         clock=FakeClock(now),
         logger=logger,
-        metrics=NoopMetrics(),
-        tracer=NoopTracer(),
+        metrics=metrics,
+        tracer=tracer,
     )
 
     result = usecase.execute()
@@ -1021,3 +1097,48 @@ def test_cleanup_expired_sandboxes_continues_after_stop_failure() -> None:
             },
         ),
     ]
+    _assert_increment_metric_present(
+        metrics,
+        'sandbox.cleanup.error.total',
+        attrs={'error.type': 'RuntimeError'},
+    )
+    _assert_increment_metric_present(
+        metrics,
+        'sandbox.cleanup.total',
+        attrs={'result': 'cleaned'},
+    )
+    assert _active_count_values(metrics)
+    assert _active_count_values(metrics)[-1] == 1
+    root_span = _find_span(
+        tracer,
+        'usecase.cleanup_expired_sandboxes',
+        span_attrs={
+            'sandbox.expired_count': 2,
+            'sandbox.cleaned_count': 1,
+            'sandbox.error_count': 1,
+            'sandbox.result': 'completed_with_errors',
+        },
+    )
+    assert not root_span.errors
+    failed_span = _find_span(
+        tracer,
+        'usecase.cleanup_expired_sandbox',
+        {
+            'chat.id': str(FAIL_CHAT_ID),
+            'session.id': str(SESSION_FAIL_ID),
+            'container.id': 'container-fail',
+        },
+        {'sandbox.result': 'error'},
+    )
+    assert [str(error) for error in failed_span.errors] == ['stop_failed']
+    cleaned_span = _find_span(
+        tracer,
+        'usecase.cleanup_expired_sandbox',
+        {
+            'chat.id': str(CLEAN_CHAT_ID),
+            'session.id': str(SESSION_CLEAN_ID),
+            'container.id': 'container-clean',
+        },
+        {'sandbox.result': 'cleaned'},
+    )
+    assert not cleaned_span.errors