feat: auto-reconnect failed gateway platforms with exponential backoff (#2584)
When a messaging platform fails to connect at startup (e.g. transient DNS failure) or disconnects at runtime with a retryable error, the gateway now queues it for background reconnection instead of giving up permanently. - New _platform_reconnect_watcher background task runs alongside the existing session expiry watcher - Exponential backoff: 30s, 60s, 120s, 240s, 300s cap - Max 20 retry attempts before giving up on a platform - Non-retryable errors (bad auth token, etc.) are not retried - Runtime disconnections via _handle_adapter_fatal_error now queue retryable failures instead of triggering gateway shutdown - On successful reconnect, adapter is wired up and channel directory is rebuilt automatically Fixes the case where a DNS blip during gateway startup caused Telegram and Discord to be permanently unavailable until manual restart.
This commit is contained in:
parent
5ddb6a191f
commit
3b509da571
3 changed files with 566 additions and 7 deletions
|
|
@ -66,7 +66,9 @@ async def test_runner_requests_clean_exit_for_nonretryable_startup_conflict(monk
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_runner_requests_failure_exit_for_retryable_runtime_fatal(monkeypatch, tmp_path):
|
||||
async def test_runner_queues_retryable_runtime_fatal_for_reconnection(monkeypatch, tmp_path):
|
||||
"""Retryable runtime fatal errors queue the platform for reconnection
|
||||
instead of shutting down the gateway."""
|
||||
config = GatewayConfig(
|
||||
platforms={
|
||||
Platform.WHATSAPP: PlatformConfig(enabled=True, token="token")
|
||||
|
|
@ -87,7 +89,7 @@ async def test_runner_requests_failure_exit_for_retryable_runtime_fatal(monkeypa
|
|||
|
||||
await runner._handle_adapter_fatal_error(adapter)
|
||||
|
||||
assert runner.should_exit_cleanly is False
|
||||
assert runner.should_exit_with_failure is True
|
||||
assert "exited unexpectedly" in runner.exit_reason
|
||||
runner.stop.assert_awaited_once()
|
||||
# Should NOT shut down — platform is queued for reconnection
|
||||
runner.stop.assert_not_awaited()
|
||||
assert Platform.WHATSAPP in runner._failed_platforms
|
||||
assert runner._failed_platforms[Platform.WHATSAPP]["attempts"] == 0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue