feat(deploy): finalize MVP deployment and file transfer approach

This commit is contained in:
Mikhail Putilovskij 2026-05-02 23:45:52 +03:00
parent 6369721876
commit 0f79494fbe
43 changed files with 3078 additions and 645 deletions

View file

@ -1,114 +0,0 @@
{
"version": "1.0",
"timestamp": "2026-04-30T15:03:14Z",
"phase": "05",
"phase_name": "MVP deployment",
"phase_dir": ".planning/phases/05-mvp-deployment",
"plan": 0,
"task": 0,
"total_tasks": 0,
"status": "paused",
"completed_tasks": [
{
"id": 1,
"name": "Fix path-based base_url normalization and add WS debug visibility",
"status": "done",
"commit": "7e5f9c2"
},
{
"id": 2,
"name": "Add Matrix room recovery, reinvite flow, and default-agent warning behavior",
"status": "done",
"commit": "7e5f9c2"
},
{
"id": 3,
"name": "Switch user file handling to workspace-root filenames with copy-style collision suffixes",
"status": "done",
"commit": "7e5f9c2"
},
{
"id": 4,
"name": "Verify recent routing incident cause",
"status": "done",
"progress": "Confirmed that config lookup is exact-MXID based; mismatch in homeserver suffix caused fallback to the first agent."
}
],
"remaining_tasks": [
{
"id": 5,
"name": "Build and publish a fresh production image with the current workspace-root attachment contract",
"status": "not_started"
},
{
"id": 6,
"name": "Send the new digest to platform and request Matrix bot redeploy",
"status": "not_started"
}
],
"blockers": [
{
"description": "Platform redeploy is still required after the next image publish.",
"type": "external",
"workaround": "None until a fresh digest is published."
},
{
"description": "Old Phase 04 planning files still contain placeholder content.",
"type": "technical",
"workaround": "Ignore for the current deploy task; clean later as planning debt."
}
],
"human_actions_pending": [
{
"action": "Use exact Matrix MXIDs in user_agents, including the real homeserver suffix.",
"context": "Routing fallback to the first agent occurs whenever the config key does not exactly match the sender.",
"blocking": true
},
{
"action": "Redeploy matrix-bot after the new image is published.",
"context": "Config edits alone need a container restart; the file-contract code change needs a new image first.",
"blocking": true
}
],
"decisions": [
{
"decision": "Keep fallback to the first agent for users missing from user_agents.",
"rationale": "Platform wanted that behavior to remain available, but with explicit user warning.",
"phase": "05"
},
{
"decision": "Require exact Matrix MXID matching in user_agents.",
"rationale": "Current routing is deterministic and simple; no fuzzy matching or homeserver aliasing was introduced.",
"phase": "05"
},
{
"decision": "Use workspace-root filenames for incoming user files and Windows-style copy suffixes on collision.",
"rationale": "Platform requested removal of incoming/outgoing directory split and timestamp-prefixed names.",
"phase": "05"
}
],
"uncommitted_files": [
".planning/HANDOFF.json",
".planning/STATE.md",
".planning/phases/05-mvp-deployment/.continue-here.md",
"README.md",
"adapter/matrix/agent_registry.py",
"adapter/matrix/bot.py",
"adapter/matrix/files.py",
"adapter/matrix/handlers/auth.py",
"adapter/matrix/handlers/chat.py",
"adapter/matrix/reconciliation.py",
"adapter/matrix/routed_platform.py",
"config/matrix-agents.example.yaml",
"docs/deploy-architecture.md",
"sdk/real.py",
"tests/adapter/matrix/test_dispatcher.py",
"tests/adapter/matrix/test_files.py",
"tests/adapter/matrix/test_invite_space.py",
"tests/adapter/matrix/test_reconciliation.py",
"tests/platform/test_real.py",
"tests/test_deploy_handoff.py"
],
"next_action": "Build and publish a fresh production image from the current worktree, then send the digest to the platform for redeploy.",
"context_notes": "Current runtime logic appears correct. The last reported routing bug was traced to config mismatch between the real Matrix sender and the user_agents key. Do not reuse the previously published recovery image for deployment because it does not include the final workspace-root file contract."
}

View file

@ -14,7 +14,7 @@ Telegram и Matrix боты для взаимодействия пользова
- ✓ core/ — унифицированный протокол событий, EventDispatcher, StateStore, ChatManager, AuthManager, SettingsManager — existing - ✓ core/ — унифицированный протокол событий, EventDispatcher, StateStore, ChatManager, AuthManager, SettingsManager — existing
- ✓ adapter/telegram/ — forum-first адаптер (Threaded Mode), `/start`, `/new`, `/archive`, `/rename`, `/settings`, стриминг ответов — existing, QA passed - ✓ adapter/telegram/ — forum-first адаптер (Threaded Mode), `/start`, `/new`, `/archive`, `/rename`, `/settings`, стриминг ответов — existing, QA passed
- ✓ adapter/matrix/ — DM-first адаптер, invite flow, `!new`, `!skills`, `!soul`, `!safety`, room-per-chat — existing - ✓ adapter/matrix/ — Space+rooms адаптер, invite flow, `!new`, `!archive`, `!rename`, `!settings`, room-per-chat — existing
- ✓ sdk/mock.py — MockPlatformClient: `stream_message`, `get_or_create_user`, `get_settings`, `update_settings` — existing - ✓ sdk/mock.py — MockPlatformClient: `stream_message`, `get_or_create_user`, `get_settings`, `update_settings` — existing
### Active ### Active
@ -50,7 +50,7 @@ Telegram и Matrix боты для взаимодействия пользова
| Forum-first (Threaded Mode) для Telegram | Bot API 9.3 позволяет личный чат как форум — чище, без суперпруппы | ✓ Good | | Forum-first (Threaded Mode) для Telegram | Bot API 9.3 позволяет личный чат как форум — чище, без суперпруппы | ✓ Good |
| (user_id, thread_id) как PK в chats | Изоляция контекстов по топику | ✓ Good | | (user_id, thread_id) как PK в chats | Изоляция контекстов по топику | ✓ Good |
| MockPlatformClient через sdk/interface.py | Не ждать SDK, разрабатывать независимо | ✓ Good | | MockPlatformClient через sdk/interface.py | Не ждать SDK, разрабатывать независимо | ✓ Good |
| DM-first для Matrix (не Space-first) | Space lifecycle слишком сложен для первого этапа | ✓ Good | | Space+rooms для Matrix | Room-based UX и явные чаты важнее DM-first упрощений | ✓ Good |
| Отказ от E2EE в Matrix | python-olm не собирается на macOS/ARM | — Pending | | Отказ от E2EE в Matrix | python-olm не собирается на macOS/ARM | — Pending |
## Evolution ## Evolution

View file

@ -2,8 +2,8 @@
gsd_state_version: 1.0 gsd_state_version: 1.0
milestone: v1.0 milestone: v1.0
milestone_name: — Production-ready surfaces milestone_name: — Production-ready surfaces
status: Phase 05 Complete status: Phase 05 Paused
last_updated: "2026-04-27T22:17:10.233Z" last_updated: "2026-04-29T08:49:04Z"
progress: progress:
total_phases: 6 total_phases: 6
completed_phases: 3 completed_phases: 3
@ -18,11 +18,13 @@ progress:
See: .planning/PROJECT.md (updated 2026-04-02) See: .planning/PROJECT.md (updated 2026-04-02)
**Core value:** Пользователь ведёт диалог с Lambda через любой мессенджер без изменения ядра **Core value:** Пользователь ведёт диалог с Lambda через любой мессенджер без изменения ядра
**Current focus:** Phase 05 complete — MVP deployment handoff is ready **Current focus:** Phase 05 paused — latest file-contract change needs a new image build before platform redeploy
## Current Phase ## Current Phase
**Phase 05** complete: MVP deployment hardening **Phase 05** paused: MVP deployment hardening is in place, but the latest attachment workspace-root change is not yet published
Deployment handoff follow-up is external. The last published image predates the latest file-handling change; the next step is to rebuild and publish a fresh image, then ask the platform to redeploy Matrix with the shared `/agents` volumes and `config/matrix-agents.yaml`.
Plan `05-01` is complete. Matrix startup now reconciles managed Space rooms from synced topology before live traffic, restoring local metadata and deterministic legacy `platform_chat_id` bindings on restart. Plan `05-01` is complete. Matrix startup now reconciles managed Space rooms from synced topology before live traffic, restoring local metadata and deterministic legacy `platform_chat_id` bindings on restart.
@ -90,6 +92,7 @@ Verified with `docker compose -f docker-compose.prod.yml config`, `docker compos
## Blockers ## Blockers
- Lambda platform SDK не готов — Phase 2 заблокирована до готовности платформы - Lambda platform SDK не готов — Phase 2 заблокирована до готовности платформы
- Full production verification depends on the platform team's real multi-agent orchestration, production Matrix credentials, `config/matrix-agents.yaml`, and shared `/agents/N` volume mounts.
## Accumulated Context ## Accumulated Context
@ -121,6 +124,6 @@ Verified with `docker compose -f docker-compose.prod.yml config`, `docker compos
## Session ## Session
- Last session: 2026-04-27T22:17:10Z - Last session: 2026-04-29T08:49:04Z
- Stopped at: Completed 05-04-PLAN.md - Stopped at: Handoff updated after attachment workspace-root change; waiting for image rebuild and platform redeploy
- Resume file: .planning/phases/05-mvp-deployment/.continue-here.md - Resume file: .planning/phases/05-mvp-deployment/.continue-here.md

View file

@ -1,53 +0,0 @@
---
context: phase
phase: 04-matrix-mvp-shared-agent-context-and-context-management-comma
task: 4
total_tasks: 6
status: in_progress
last_updated: 2026-04-24T12:16:09.301Z
---
<current_state>
Debugging first-chunk truncation bug in Matrix bot. Logging added to both sdk/real.py and external/platform-agent/src/agent/service.py. Waiting for user to run docker compose up --build and share platform-agent logs with stream_event lines.
</current_state>
<completed_work>
- docker-compose.yml: added `./config:/app/config:ro` volume mount so MATRIX_AGENT_REGISTRY_PATH works
- config/matrix-agents.example.yaml: updated labels to Platform/Media
- sdk/real.py: added structlog debug logging in _stream_agent_events (logs each chunk index + text[:40])
- external/platform-agent/src/agent/service.py: added logging of langgraph_node, content_type, content[:60] for every on_chat_model_stream event
Bot is running and user confirmed it starts correctly with MATRIX_PLATFORM_BACKEND=real.
</completed_work>
<remaining_work>
- Task 4: Get platform-agent debug logs (docker compose up --build, reproduce truncation, share stream_event lines)
- Task 5: Analyze: check content_type (str vs list), check langgraph_node (which graph node produces the first chunk)
- Task 6: Fix service.py based on findings
</remaining_work>
<decisions_made>
- Bug confirmed to be in platform-agent, NOT in surfaces bot: our sdk/real.py logs show chunk index=0 already has truncated text (e.g. ' Д Е Ё...' instead of 'А Б В Г Д...')
- deepagents framework uses SubAgentMiddleware: main dispatcher agent + general-purpose subagent
- service.py processes ALL on_chat_model_stream events from astream_events v2 with no node filtering
- Two leading hypotheses: (A) chunk.content is a list for some events (multimodal), causing silent skip/error; (B) events from wrong graph node are being captured/not captured
</decisions_made>
<blockers>
- Need user to run docker compose up --build and share platform-agent logs with DEBUG output
</blockers>
<context>
The deepagents architecture: create_deep_agent creates a main orchestrator with SubAgentMiddleware wrapping a general-purpose subagent. When astream_events v2 runs, it may emit on_chat_model_stream from both the main agent's LLM call AND the subagent's LLM call. service.py captures ALL of them. The first chunk of the actual response might be from the subagent (not forwarded to client properly), while the main agent's response starts mid-sentence because it "sees" the subagent's output in its tool result context.
Two key things to look for in logs:
1. content_type=list → fix is `chunk.content[0].get("text", "")` or similar
2. langgraph_node varies between chunks → fix is to filter to the correct node (e.g. only "agent" node)
</context>
<next_action>
Start with: docker compose up --build. Then send a message with image context (e.g. send an image first, then ask 'Напомни алфавит'). Share platform-agent-1 logs — specifically the stream_event lines showing ns= and content_type= values.
</next_action>

View file

@ -1,62 +0,0 @@
---
phase: 05-mvp-deployment
phase_name: MVP deployment
task: 0
total_tasks: 0
status: paused
last_updated: 2026-04-30T15:03:14Z
---
<current_state>
Phase 05 code changes are in place, but the latest workspace-root attachment contract is not yet published in a new production image. Today's last debugging step confirmed that the user-to-agent config itself was fine except for one exact-MXID mismatch: the homeserver suffix in `user_agents` did not match the real Matrix sender, so fallback to the first agent was expected.
</current_state>
<completed_work>
- Fixed the path-based `base_url` normalization bug that caused WS connects to drop route prefixes.
- Added WS lifecycle debug logging behind `SURFACES_DEBUG_WS=1`.
- Added Matrix routing/recovery behavior:
- warning users when they are not listed in `user_agents`
- preserving room bindings across config updates
- re-inviting users back into their Space and active rooms after leave
- `!new` from the entry/DM room to create a fresh working chat
- Reworked attachment handling so user files now go directly into the agent workspace root with Windows-style collision suffixes like `file (1).pdf`.
- Updated docs and tests to match the new root-workspace file contract.
- Verified that the recent “still goes to default agent” report was caused by exact MXID mismatch in config, not by YAML parsing or runtime routing logic.
- Published earlier images:
- `mput1/surfaces-bot:debug-ws-20260429`
- `mput1/surfaces-bot:matrix-recovery-20260429`
</completed_work>
<remaining_work>
- Build and publish a new production image that includes the latest workspace-root attachment changes.
- Give the platform the new digest and ask them to redeploy the Matrix bot container.
- Optionally run local smoke/fullstack validation once more before publishing if extra confidence is needed.
</remaining_work>
<decisions_made>
- Keep the fallback to the first agent when a user is missing from `user_agents`.
- Require exact Matrix MXID match in `user_agents`; no fuzzy matching or homeserver normalization was added.
- Warn the user in-band when default-agent fallback is used.
- Keep room identity and `platform_chat_id` stable across config updates.
- Require container restart for config changes; no image rebuild is needed for `matrix-agents.yaml` edits alone.
- Remove `incoming/` and timestamp prefixes from the attachment contract.
- Save uploaded user files directly at the workspace root and resolve collisions with copy-style suffixes.
</decisions_made>
<blockers>
- No code blocker.
- External dependency: platform redeploy after the next image publish.
- Historical debt: placeholder summary/plan artifacts still exist in old Phase 04 files and were not cleaned during this session.
</blockers>
<context>
The current codebase should route correctly if the deployed config uses the exact real Matrix sender IDs, e.g. `@user:matrix.lambda.coredump.ru`. The next likely mistake during resume would be publishing the wrong image digest: the currently published recovery image predates the latest file-contract change. Resume by building a fresh image from the current worktree, not by reusing the old digest.
</context>
<next_action>
Rebuild the production image from the current worktree, publish it, and send the new digest to the platform for redeploy.
</next_action>

View file

@ -0,0 +1,158 @@
---
phase: 05-mvp-deployment
plan: 01
type: execute
wave: 1
depends_on: []
files_modified:
- adapter/matrix/reconciliation.py
- adapter/matrix/bot.py
- tests/adapter/matrix/test_reconciliation.py
- tests/adapter/matrix/test_restart_persistence.py
autonomous: true
requirements:
- PH05-01
- PH05-03
must_haves:
truths:
- "On restart, existing Matrix Space and child-room topology is rebuilt before live sync begins."
- "Restart recovery preserves Space+rooms UX instead of creating duplicate DM-style working rooms."
- "Recovered rooms regain user metadata, room metadata, and chat bindings needed for normal routing."
- "Legacy working rooms missing `platform_chat_id` are backfilled deterministically during startup before strict routing handles traffic."
artifacts:
- path: "adapter/matrix/reconciliation.py"
provides: "Authoritative restart reconciliation from Matrix topology into local metadata"
- path: "adapter/matrix/bot.py"
provides: "Startup wiring that runs reconciliation before sync_forever"
- path: "tests/adapter/matrix/test_reconciliation.py"
provides: "Regression coverage for startup recovery and idempotence"
key_links:
- from: "adapter/matrix/bot.py"
to: "adapter/matrix/reconciliation.py"
via: "startup bootstrap before sync_forever"
pattern: "reconcil"
- from: "adapter/matrix/reconciliation.py"
to: "core/chat.py"
via: "chat manager rebuild for recovered rooms"
pattern: "get_or_create"
---
<objective>
Rebuild Matrix-local routing state from authoritative Space topology before the bot processes live traffic.
Purpose: Preserve the Phase 01 Space+rooms contract after restart even if SQLite metadata is partial or missing.
Output: A startup reconciliation module, bot wiring, and regression tests proving no DM-first duplication on restart.
</objective>
<execution_context>
@/Users/a/.codex/get-shit-done/workflows/execute-plan.md
@/Users/a/.codex/get-shit-done/templates/summary.md
</execution_context>
<context>
@.planning/PROJECT.md
@.planning/ROADMAP.md
@.planning/STATE.md
@.planning/phases/05-mvp-deployment/05-RESEARCH.md
@.planning/phases/05-mvp-deployment/05-VALIDATION.md
@.planning/phases/04-matrix-mvp-shared-agent-context-and-context-management-comma/04-02-SUMMARY.md
@adapter/matrix/bot.py
@adapter/matrix/store.py
@adapter/matrix/handlers/auth.py
@tests/adapter/matrix/test_invite_space.py
@tests/adapter/matrix/test_chat_space.py
@tests/adapter/matrix/test_restart_persistence.py
<interfaces>
From `adapter/matrix/bot.py`:
```python
async def prepare_live_sync(client: AsyncClient) -> str | None:
response = await client.sync(timeout=0, full_state=True)
if isinstance(response, SyncResponse):
return response.next_batch
return None
```
```python
class MatrixBot:
async def _bootstrap_unregistered_room(
self,
room: MatrixRoom,
sender: str,
) -> list[OutgoingEvent] | None: ...
```
From `adapter/matrix/store.py`:
```python
async def get_room_meta(store: StateStore, room_id: str) -> dict | None: ...
async def set_room_meta(store: StateStore, room_id: str, meta: dict) -> None: ...
async def get_user_meta(store: StateStore, matrix_user_id: str) -> dict | None: ...
async def set_user_meta(store: StateStore, matrix_user_id: str, meta: dict) -> None: ...
async def next_platform_chat_id(store: StateStore) -> str: ...
```
</interfaces>
</context>
<tasks>
<task type="auto" tdd="true">
<name>Task 1: Add restart reconciliation regression coverage</name>
<files>tests/adapter/matrix/test_reconciliation.py, tests/adapter/matrix/test_restart_persistence.py</files>
<read_first>tests/adapter/matrix/test_invite_space.py, tests/adapter/matrix/test_chat_space.py, tests/adapter/matrix/test_restart_persistence.py, adapter/matrix/bot.py, adapter/matrix/handlers/auth.py, .planning/phases/05-mvp-deployment/05-RESEARCH.md</read_first>
<behavior>
- Test 1: startup recovery rebuilds user space metadata, room metadata, and chat bindings from Matrix topology without creating new working rooms (per D-Phase05-reset and PH05-01).
- Test 2: reconciliation is idempotent and safe when local SQLite state is already present.
- Test 3: reconciliation happens before lazy `_bootstrap_unregistered_room()` would run for existing rooms (per PH05-03).
- Test 4: legacy room metadata missing `platform_chat_id` is backfilled deterministically at startup and persisted before routed handling begins.
</behavior>
<acceptance_criteria>
- `tests/adapter/matrix/test_reconciliation.py` exists and names reconciliation entrypoints explicitly.
- The new tests assert restored `space_id`, `chat_id`, `matrix_user_id`, and `platform_chat_id` values for recovered rooms.
- The regression slice also proves existing Space onboarding behavior still passes by running `test_invite_space.py` and `test_chat_space.py`.
- The automated command in `<verify>` fails before implementation or would fail if reconciliation is removed.
</acceptance_criteria>
<action>Create a dedicated `tests/adapter/matrix/test_reconciliation.py` module and extend restart persistence coverage so Phase 05 has a real Wave 0 contract. Model the recovered topology after the Phase 01 Space+rooms onboarding tests, not a DM-first flow, and explicitly keep those onboarding regressions in the verification slice so restart hardening cannot break provisioning UX. Cover recovery of `user_meta`, `room_meta`, `ChatManager` bindings, and room-local routing fields from Matrix-side state before live callbacks begin, including deterministic backfill for legacy rooms that predate `platform_chat_id`. Keep temporary UX state out of scope, per research.</action>
<verify>
<automated>pytest tests/adapter/matrix/test_invite_space.py tests/adapter/matrix/test_chat_space.py tests/adapter/matrix/test_reconciliation.py tests/adapter/matrix/test_restart_persistence.py -v</automated>
</verify>
<done>Phase 05 has failing-or-red-before-code tests that define authoritative restart reconciliation behavior and exclude duplicate room provisioning.</done>
</task>
<task type="auto" tdd="true">
<name>Task 2: Implement authoritative startup reconciliation and wire it before live sync</name>
<files>adapter/matrix/reconciliation.py, adapter/matrix/bot.py</files>
<read_first>adapter/matrix/bot.py, adapter/matrix/store.py, adapter/matrix/handlers/auth.py, tests/adapter/matrix/test_reconciliation.py, tests/adapter/matrix/test_restart_persistence.py, .planning/phases/05-mvp-deployment/05-RESEARCH.md</read_first>
<behavior>
- Test 1: startup rebuild runs after login and initial full-state fetch, but before `sync_forever()` processes live events.
- Test 2: recovered rooms keep their existing Space+rooms identity and do not trigger `_bootstrap_unregistered_room()` unless the room is genuinely new.
- Test 3: local metadata can be rebuilt from Matrix topology when SQLite entries are missing, while existing valid metadata remains stable.
- Test 4: startup repair assigns a deterministic `platform_chat_id` to legacy rooms missing that field and persists it before routed platform calls can occur.
</behavior>
<acceptance_criteria>
- `adapter/matrix/reconciliation.py` exports a focused reconciliation entrypoint used by startup code.
- `adapter/matrix/bot.py` invokes reconciliation before `client.sync_forever(...)`.
- Recovered room metadata includes `room_type`, `chat_id`, `space_id`, `matrix_user_id`, and `platform_chat_id` where available or rebuildable.
- Legacy rooms missing `platform_chat_id` follow one documented startup backfill path rather than ad hoc routing fallbacks.
</acceptance_criteria>
<action>Implement a restart recovery module that treats Matrix topology as authoritative, per the Phase 05 reset and research notes. Rebuild missing local metadata for Space-owned working rooms, deterministically backfill missing `platform_chat_id` values for legacy rooms, and re-create `ChatManager` entries needed by routing, while keeping SQLite as a rebuildable cache rather than the source of truth. Wire the new reconciliation step into startup after the initial full-state sync and before live sync begins, and keep the onboarding regression slice green while doing it. Do not widen into timeline scraping, new storage backends, or DM-first fallbacks.</action>
<verify>
<automated>pytest tests/adapter/matrix/test_invite_space.py tests/adapter/matrix/test_chat_space.py tests/adapter/matrix/test_reconciliation.py tests/adapter/matrix/test_restart_persistence.py tests/adapter/matrix/test_dispatcher.py -v</automated>
</verify>
<done>Restart recovery restores the minimum durable state for existing Space rooms before live traffic, and the guarded regression suite passes.</done>
</task>
</tasks>
<verification>
Run the onboarding, reconciliation, restart-persistence, and Matrix dispatcher slices together. Confirm startup now has a deterministic pre-sync recovery and legacy-room backfill step instead of relying on lazy room bootstrap or routing-time fallbacks for existing topology.
</verification>
<success_criteria>
The bot can restart with partial or empty local room metadata, rebuild managed Space rooms before live sync, and continue handling those rooms without creating duplicate onboarding rooms.
</success_criteria>
<output>
After completion, create `.planning/phases/05-mvp-deployment/05-01-SUMMARY.md`
</output>

View file

@ -0,0 +1,99 @@
---
phase: 05-mvp-deployment
plan: 01
subsystem: infra
tags: [matrix, reconciliation, sqlite, startup, testing]
requires:
- phase: 01-matrix-mvp
provides: Space+rooms onboarding, room metadata, and Matrix dispatcher behavior
- phase: 04-matrix-mvp-shared-agent-context-and-context-management-comma
provides: durable platform_chat_id and restart persistence primitives
provides:
- authoritative startup reconciliation from Matrix room topology into local metadata
- pre-sync startup wiring that repairs managed rooms before live traffic
- restart regression coverage for reconciliation, idempotence, and legacy platform_chat_id backfill
affects: [matrix, startup, deployment, restart-persistence]
tech-stack:
added: []
patterns: [matrix-topology-as-source-of-truth, sqlite-cache-rebuild, pre-sync-reconciliation]
key-files:
created: [adapter/matrix/reconciliation.py, tests/adapter/matrix/test_reconciliation.py]
modified: [adapter/matrix/bot.py, tests/adapter/matrix/test_restart_persistence.py]
key-decisions:
- "Treat synced Matrix parent/child topology as authoritative for managed room recovery; keep SQLite rebuildable."
- "Backfill missing platform_chat_id values during startup reconciliation instead of routing-time fallbacks."
patterns-established:
- "Startup runs full-state sync, then reconciliation, then sync_forever."
- "Recovered Matrix rooms rebuild user_meta, room_meta, auth state, and ChatManager bindings idempotently."
requirements-completed: [PH05-01, PH05-03]
duration: 8min
completed: 2026-04-27
---
# Phase 05 Plan 01: Restart Reconciliation Summary
**Matrix startup now rebuilds Space-owned working rooms into durable local routing state before live sync begins**
## Performance
- **Duration:** 8 min
- **Started:** 2026-04-27T22:00:47Z
- **Completed:** 2026-04-27T22:08:47Z
- **Tasks:** 2
- **Files modified:** 4
## Accomplishments
- Added a dedicated reconciliation module that restores `user_meta`, `room_meta`, auth state, chat bindings, and missing `platform_chat_id` values from the synced Matrix room graph.
- Wired startup to run reconciliation immediately after the initial full-state sync and before `sync_forever()`.
- Added regression coverage for recovery, idempotence, pre-sync ordering, onboarding compatibility, and legacy restart backfill.
## Task Commits
Each task was committed atomically:
1. **Task 1: Add restart reconciliation regression coverage** - `a75b26a` (test)
2. **Task 2: Implement authoritative startup reconciliation and wire it before live sync** - `8a80d00` (feat)
## Files Created/Modified
- `adapter/matrix/reconciliation.py` - Startup recovery from Matrix topology into local room and user metadata.
- `adapter/matrix/bot.py` - Startup wiring that runs reconciliation after the bootstrap sync and before live sync.
- `tests/adapter/matrix/test_reconciliation.py` - Recovery, idempotence, and startup-order regression coverage.
- `tests/adapter/matrix/test_restart_persistence.py` - Legacy `platform_chat_id` backfill persistence coverage.
## Decisions Made
- Used the synced Matrix room graph as the authoritative source for restart recovery, while preserving existing local metadata whenever it is already valid.
- Kept legacy `platform_chat_id` repair on a single startup path so routed handling never needs ad hoc fallback creation for existing rooms.
## Deviations from Plan
### Auto-fixed Issues
**1. [Rule 3 - Blocking] Switched verification to a clean `uv run pytest` environment**
- **Found during:** Task 1 and Task 2 verification
- **Issue:** The default `pytest` path used a mismatched virtualenv without repo dependencies, and `.env` injected Matrix backend variables that polluted mock-mode tests.
- **Fix:** Ran the verification slice through `uv run pytest` with `UV_CACHE_DIR=/tmp/uv-cache-surfaces` and blank `MATRIX_AGENT_REGISTRY_PATH` / `MATRIX_PLATFORM_BACKEND` values to match the intended test environment.
- **Files modified:** None
- **Verification:** `uv run pytest` slice passed with 50/50 tests green
- **Committed in:** not applicable (verification-only adjustment)
---
**Total deviations:** 1 auto-fixed (1 blocking)
**Impact on plan:** Verification needed an environment correction, but code scope stayed within the plan and owned files.
## Issues Encountered
- The shell environment loaded deployment-oriented Matrix backend settings from `.env`; these had to be neutralized for the mock-mode regression slice.
## User Setup Required
None - no external service configuration required.
## Next Phase Readiness
- Restart recovery is in place for existing Space rooms, including deterministic legacy `platform_chat_id` repair.
- Remaining Phase 05 plans can build on a stable pre-sync recovery path instead of lazy bootstrap for existing topology.
## Self-Check: PASSED
---
*Phase: 05-mvp-deployment*
*Completed: 2026-04-27*

View file

@ -0,0 +1,156 @@
---
phase: 05-mvp-deployment
plan: 02
type: execute
wave: 2
depends_on:
- 05-01
files_modified:
- adapter/matrix/handlers/__init__.py
- adapter/matrix/handlers/context_commands.py
- adapter/matrix/routed_platform.py
- tests/adapter/matrix/test_context_commands.py
- tests/adapter/matrix/test_routed_platform.py
autonomous: true
requirements:
- PH05-02
must_haves:
truths:
- "Each working Matrix room uses its own durable `platform_chat_id` as the real agent context boundary."
- "`!clear` resets only the current room by rotating its `platform_chat_id` and disconnecting the old upstream chat."
- "Save, load, context, and routed send paths resolve through room-local platform context, not shared user state."
- "Strict room routing assumes startup reconciliation has already repaired legacy rooms missing `platform_chat_id`."
artifacts:
- path: "adapter/matrix/handlers/context_commands.py"
provides: "Room-local `!clear`, save/load/context resolution, and upstream disconnect behavior"
- path: "adapter/matrix/routed_platform.py"
provides: "Strict room -> agent_id + platform_chat_id routing"
- path: "tests/adapter/matrix/test_context_commands.py"
provides: "Regression coverage for `!clear` and room-local context commands"
key_links:
- from: "adapter/matrix/handlers/__init__.py"
to: "adapter/matrix/handlers/context_commands.py"
via: "IncomingCommand registration for `clear`"
pattern: "\"clear\""
- from: "adapter/matrix/routed_platform.py"
to: "adapter/matrix/store.py"
via: "room metadata lookup"
pattern: "platform_chat_id"
---
<objective>
Make room-local platform context explicit and user-facing by shipping real `!clear` semantics and strict per-room routing.
Purpose: Phase 05 must preserve Space+rooms UX while giving each room a true upstream context boundary.
Output: Updated command wiring, room-local context reset behavior, and routing regressions tied to `platform_chat_id`.
</objective>
<execution_context>
@/Users/a/.codex/get-shit-done/workflows/execute-plan.md
@/Users/a/.codex/get-shit-done/templates/summary.md
</execution_context>
<context>
@.planning/PROJECT.md
@.planning/ROADMAP.md
@.planning/STATE.md
@.planning/phases/05-mvp-deployment/05-RESEARCH.md
@.planning/phases/05-mvp-deployment/05-VALIDATION.md
@.planning/phases/04-matrix-mvp-shared-agent-context-and-context-management-comma/04-02-SUMMARY.md
@adapter/matrix/handlers/__init__.py
@adapter/matrix/handlers/context_commands.py
@adapter/matrix/routed_platform.py
@tests/adapter/matrix/test_context_commands.py
@tests/adapter/matrix/test_routed_platform.py
<interfaces>
From `adapter/matrix/handlers/__init__.py`:
```python
dispatcher.register(
IncomingCommand,
"reset",
make_handle_reset(store, prototype_state)
if prototype_state is not None
else handle_settings,
)
```
From `adapter/matrix/handlers/context_commands.py`:
```python
async def _resolve_context_scope(
event: IncomingCommand,
store: StateStore,
chat_mgr,
) -> tuple[str, str | None]: ...
```
From `adapter/matrix/routed_platform.py`:
```python
async def _resolve_delegate(self, user_id: str, local_chat_id: str) -> tuple[PlatformClient, str]:
...
```
</interfaces>
</context>
<tasks>
<task type="auto" tdd="true">
<name>Task 1: Expand room-local context and clear-command tests</name>
<files>tests/adapter/matrix/test_context_commands.py, tests/adapter/matrix/test_routed_platform.py</files>
<read_first>tests/adapter/matrix/test_context_commands.py, tests/adapter/matrix/test_routed_platform.py, adapter/matrix/handlers/__init__.py, adapter/matrix/handlers/context_commands.py, adapter/matrix/routed_platform.py, .planning/phases/05-mvp-deployment/05-VALIDATION.md</read_first>
<behavior>
- Test 1: `!clear` rotates only the current room's `platform_chat_id` and disconnects only the old upstream chat (per PH05-02).
- Test 2: `!clear` is the supported command name; `!reset` may remain as a compatibility alias but must not be the only registered path.
- Test 3: routed send/stream paths fail fast when room metadata lacks `agent_id` or `platform_chat_id` instead of silently sharing context.
- Test 4: routed behavior uses startup-repaired room metadata and does not introduce a second fallback path that invents `platform_chat_id` during message handling.
</behavior>
<acceptance_criteria>
- Tests explicitly mention `clear` in command registration or command invocation.
- The context-command tests assert old and new `platform_chat_id` values and upstream disconnect behavior.
- The routed-platform tests assert room-local IDs are passed to delegates unchanged.
</acceptance_criteria>
<action>Extend the current Matrix context-command and routed-platform regressions so Phase 05 has direct coverage for `!clear`, room-local `platform_chat_id` rotation, and fail-fast routing when room bindings are incomplete. Treat startup reconciliation from `05-01` as the only supported repair path for legacy rooms missing `platform_chat_id`; the routed path must consume repaired metadata, not synthesize new room identities on demand. Preserve the Phase 04 prototype-state behavior where it still fits, but anchor new checks on per-room context isolation rather than shared session assumptions.</action>
<verify>
<automated>pytest tests/adapter/matrix/test_context_commands.py tests/adapter/matrix/test_routed_platform.py -v</automated>
</verify>
<done>The tests define the Phase 05 room-local contract for reset/clear and for routed upstream calls.</done>
</task>
<task type="auto" tdd="true">
<name>Task 2: Ship real room-local `!clear` semantics and strict routing</name>
<files>adapter/matrix/handlers/__init__.py, adapter/matrix/handlers/context_commands.py, adapter/matrix/routed_platform.py</files>
<read_first>adapter/matrix/handlers/__init__.py, adapter/matrix/handlers/context_commands.py, adapter/matrix/routed_platform.py, tests/adapter/matrix/test_context_commands.py, tests/adapter/matrix/test_routed_platform.py, .planning/phases/05-mvp-deployment/05-RESEARCH.md</read_first>
<behavior>
- Test 1: command registration exposes `!clear` as the real context-reset entrypoint for Matrix rooms.
- Test 2: only the active room's `platform_chat_id` rotates, and only the old upstream chat session is disconnected.
- Test 3: all room-local context commands resolve through recovered room metadata instead of falling back to shared user scope.
- Test 4: strict routing stays strict at runtime because legacy-room repair was already handled at startup by `05-01`, not by hidden message-path fallbacks.
</behavior>
<acceptance_criteria>
- `adapter/matrix/handlers/__init__.py` registers `clear`; if `reset` remains, it is clearly a compatibility alias.
- `adapter/matrix/handlers/context_commands.py` resolves and rotates room-local platform context without touching other rooms.
- `adapter/matrix/routed_platform.py` keeps explicit `MATRIX_ROUTE_INCOMPLETE` behavior when bindings are missing.
</acceptance_criteria>
<action>Update the Matrix context command surface to match the Phase 05 contract: real `!clear`, room-local `platform_chat_id` rotation, and upstream disconnect scoped to the old room context. Keep `save`, `load`, and `context` anchored to the same room-local identity. Tighten routed-platform behavior only where needed to preserve fail-fast semantics after startup reconciliation has repaired legacy rooms; do not reintroduce shared chat state, user-level reset behavior, or message-time backfill of missing `platform_chat_id`.</action>
<verify>
<automated>pytest tests/adapter/matrix/test_context_commands.py tests/adapter/matrix/test_routed_platform.py tests/adapter/matrix/test_dispatcher.py -v</automated>
</verify>
<done>Users can clear one working room without affecting others, and all routed upstream calls stay bound to room-local platform context.</done>
</task>
</tasks>
<verification>
Run the context and routed-platform slices plus Matrix dispatcher smoke coverage to confirm the exposed command name and room-local routing behavior are consistent.
</verification>
<success_criteria>
Every working Matrix room has an independent upstream context boundary, and `!clear` resets only the room where it is invoked.
</success_criteria>
<output>
After completion, create `.planning/phases/05-mvp-deployment/05-02-SUMMARY.md`
</output>

View file

@ -0,0 +1,145 @@
---
phase: 05-mvp-deployment
plan: 03
type: execute
wave: 1
depends_on: []
files_modified:
- adapter/matrix/files.py
- sdk/real.py
- tests/adapter/matrix/test_files.py
- tests/platform/test_real.py
autonomous: true
requirements:
- PH05-04
must_haves:
truths:
- "Incoming Matrix attachments are written into a room-safe shared-volume path and passed upstream as relative workspace paths."
- "Agent-emitted files can be returned to Matrix users without inventing a separate file proxy."
- "The shared-volume contract works with the Phase 05 `/agents` deployment shape."
artifacts:
- path: "adapter/matrix/files.py"
provides: "Room-safe shared-volume path building and path resolution"
- path: "sdk/real.py"
provides: "Attachment path passthrough and send-file normalization"
- path: "tests/adapter/matrix/test_files.py"
provides: "Regression coverage for shared-volume path construction"
key_links:
- from: "adapter/matrix/files.py"
to: "sdk/real.py"
via: "relative `workspace_path` transport"
pattern: "workspace_path"
- from: "sdk/real.py"
to: "adapter/matrix/bot.py"
via: "OutgoingMessage attachments rendered back to Matrix"
pattern: "MsgEventSendFile"
---
<objective>
Harden the Matrix attachment path contract around the shared deployment volume instead of custom transport shims.
Purpose: Phase 05 file handling must survive real deployment with room-safe paths and outbound file delivery through the existing shared-volume model.
Output: Attachment-path regressions and any targeted runtime fixes needed for `/agents`-backed shared volume behavior.
</objective>
<execution_context>
@/Users/a/.codex/get-shit-done/workflows/execute-plan.md
@/Users/a/.codex/get-shit-done/templates/summary.md
</execution_context>
<context>
@.planning/PROJECT.md
@.planning/ROADMAP.md
@.planning/STATE.md
@.planning/phases/05-mvp-deployment/05-RESEARCH.md
@.planning/phases/05-mvp-deployment/05-VALIDATION.md
@docs/deploy-architecture.md
@docs/superpowers/specs/2026-04-20-matrix-shared-workspace-file-flow-design.md
@adapter/matrix/files.py
@sdk/real.py
@tests/adapter/matrix/test_files.py
@tests/platform/test_real.py
<interfaces>
From `adapter/matrix/files.py`:
```python
def build_workspace_attachment_path(
*,
workspace_root: Path,
matrix_user_id: str,
room_id: str,
filename: str,
timestamp: str | None = None,
) -> tuple[str, Path]: ...
```
From `sdk/real.py`:
```python
@staticmethod
def _attachment_paths(attachments: list[Attachment] | None) -> list[str]: ...
@staticmethod
def _attachment_from_send_file_event(event: MsgEventSendFile) -> Attachment: ...
```
</interfaces>
</context>
<tasks>
<task type="auto" tdd="true">
<name>Task 1: Add shared-volume file contract tests for `/agents` deployment</name>
<files>tests/adapter/matrix/test_files.py, tests/platform/test_real.py</files>
<read_first>tests/adapter/matrix/test_files.py, tests/platform/test_real.py, adapter/matrix/files.py, sdk/real.py, docs/deploy-architecture.md, .planning/phases/05-mvp-deployment/05-RESEARCH.md</read_first>
<behavior>
- Test 1: incoming Matrix files land under a room-safe `surfaces/matrix/.../inbox/...` path that remains relative to the agent workspace contract.
- Test 2: upstream file events normalize `/workspace/...` and `/agents/...`-style absolute paths into relative `workspace_path` values.
- Test 3: attachment forwarding never switches to inline blobs or HTTP shim URLs (per PH05-04).
</behavior>
<acceptance_criteria>
- `tests/adapter/matrix/test_files.py` asserts the path namespace includes sanitized user and room components.
- `tests/platform/test_real.py` contains explicit coverage for send-file path normalization.
- The automated test command in `<verify>` exercises both inbound and outbound sides of the shared-volume contract.
</acceptance_criteria>
<action>Expand the file-flow regressions around the real deployment contract described in research and `docs/deploy-architecture.md`. Keep the tests centered on relative `workspace_path` transport and room-safe on-disk layout. Do not introduce proxy URLs, base64 payload transport, or new platform endpoints.</action>
<verify>
<automated>pytest tests/adapter/matrix/test_files.py tests/platform/test_real.py -v</automated>
</verify>
<done>Phase 05 has direct test coverage for `/agents`-backed shared-volume behavior across inbound and outbound file paths.</done>
</task>
<task type="auto" tdd="true">
<name>Task 2: Tighten attachment path handling for the shared volume contract</name>
<files>adapter/matrix/files.py, sdk/real.py</files>
<read_first>adapter/matrix/files.py, sdk/real.py, tests/adapter/matrix/test_files.py, tests/platform/test_real.py, docs/deploy-architecture.md</read_first>
<behavior>
- Test 1: inbound attachment helpers keep returning relative paths even when the bot writes into `/agents`.
- Test 2: outbound file normalization accepts absolute paths from the agent runtime but strips them back to relative workspace paths for Matrix rendering.
- Test 3: no code path emits non-relative attachment references to the upstream agent API.
</behavior>
<acceptance_criteria>
- `sdk/real.py` only forwards relative attachment paths to the agent API.
- `sdk/real.py` normalizes both `/workspace` and `/agents` absolute roots if present in send-file events.
- `adapter/matrix/files.py` remains the single source of truth for room-safe attachment path construction.
</acceptance_criteria>
<action>Implement only the minimum runtime changes needed to satisfy the shared-volume tests. Keep `adapter/matrix/files.py` as the single place that builds surface-owned attachment paths, and keep `sdk/real.py` responsible only for attachment passthrough and send-file normalization. Do not widen this plan into compose edits, registry redesign, or bot command changes.</action>
<verify>
<automated>pytest tests/adapter/matrix/test_files.py tests/platform/test_real.py tests/adapter/matrix/test_send_outgoing.py -v</automated>
</verify>
<done>Incoming and outgoing file references stay compatible with the real shared-volume deployment contract, and the targeted file/path regressions pass.</done>
</task>
</tasks>
<verification>
Run the Matrix file helper, real platform client, and outgoing-send slices together so the shared-volume contract is validated from write path through return-to-user rendering.
</verification>
<success_criteria>
The Matrix bot and agent runtime can exchange file references through the shared volume using only relative workspace paths and room-safe storage layout.
</success_criteria>
<output>
After completion, create `.planning/phases/05-mvp-deployment/05-03-SUMMARY.md`
</output>

View file

@ -0,0 +1,128 @@
---
phase: 05-mvp-deployment
plan: 04
type: execute
wave: 2
depends_on:
- 05-03
files_modified:
- docker-compose.prod.yml
- docker-compose.fullstack.yml
- Dockerfile
- .env.example
- README.md
- docs/deploy-architecture.md
autonomous: true
requirements:
- PH05-05
must_haves:
truths:
- "Production handoff uses a bot-only compose artifact instead of the internal full-stack harness."
- "Internal E2E compose brings up the bot, platform-agent, and shared volume with explicit health-gated startup."
- "Deployment docs and env examples match the split compose artifacts and shared `/agents` contract."
artifacts:
- path: "docker-compose.prod.yml"
provides: "Bot-only deployment handoff artifact"
- path: "docker-compose.fullstack.yml"
provides: "Internal E2E harness with shared volume and dependency gating"
- path: ".env.example"
provides: "Documented runtime contract for Phase 05 deployment"
key_links:
- from: "docker-compose.fullstack.yml"
to: "docker-compose.prod.yml"
via: "shared service definition or explicit duplication"
pattern: "matrix-bot"
- from: "docs/deploy-architecture.md"
to: "docker-compose.prod.yml"
via: "operator handoff instructions"
pattern: "prod"
---
<objective>
Split deployment artifacts by operational intent so operator handoff and internal E2E testing stop sharing the same compose contract.
Purpose: Phase 05 needs an explicit bot-only production artifact and a separate full-stack compose harness aligned with the shared-volume design.
Output: `docker-compose.prod.yml`, `docker-compose.fullstack.yml`, and updated env/docs describing when to use each.
</objective>
<execution_context>
@/Users/a/.codex/get-shit-done/workflows/execute-plan.md
@/Users/a/.codex/get-shit-done/templates/summary.md
</execution_context>
<context>
@.planning/PROJECT.md
@.planning/ROADMAP.md
@.planning/STATE.md
@.planning/phases/05-mvp-deployment/05-RESEARCH.md
@.planning/phases/05-mvp-deployment/05-VALIDATION.md
@.planning/phases/04-matrix-mvp-shared-agent-context-and-context-management-comma/04-03-SUMMARY.md
@docs/deploy-architecture.md
@docker-compose.yml
@Dockerfile
@.env.example
<interfaces>
Current root compose contract:
```yaml
services:
platform-agent:
...
matrix-bot:
build: .
env_file: .env
environment:
AGENT_BASE_URL: http://platform-agent:8000
SURFACES_WORKSPACE_DIR: /workspace
```
</interfaces>
</context>
<tasks>
<task type="auto">
<name>Task 1: Create split prod and fullstack compose artifacts</name>
<files>docker-compose.prod.yml, docker-compose.fullstack.yml, Dockerfile, .env.example</files>
<read_first>docker-compose.yml, Dockerfile, .env.example, docs/deploy-architecture.md, .planning/phases/05-mvp-deployment/05-RESEARCH.md, .planning/phases/05-mvp-deployment/05-VALIDATION.md</read_first>
<acceptance_criteria>
- `docker-compose.prod.yml` defines only the bot-side runtime required for operator handoff.
- `docker-compose.fullstack.yml` includes the internal platform-agent service, shared volume mounts, and health-gated startup rather than sleep-based sequencing.
- `.env.example` documents the Phase 05 env contract without requiring the reader to inspect the old root compose file.
</acceptance_criteria>
<action>Create two explicit compose artifacts per PH05-05: a bot-only `docker-compose.prod.yml` for deployment handoff and a `docker-compose.fullstack.yml` for internal E2E runs. Align mounts and env values with the Phase 05 shared `/agents` volume direction from research. Reuse the existing Dockerfile unless a small compatibility edit is required. Do not keep the old single-file compose setup as the only documented runtime.</action>
<verify>
<automated>docker compose -f docker-compose.prod.yml config > /tmp/phase05-prod-compose.yml && docker compose -f docker-compose.fullstack.yml config > /tmp/phase05-fullstack-compose.yml && rg -n "^services:$|^ matrix-bot:$|^volumes:$|/agents" /tmp/phase05-prod-compose.yml && test -z "$(rg -n "^ platform-agent:$" /tmp/phase05-prod-compose.yml)" && rg -n "^ matrix-bot:$|^ platform-agent:$|condition: service_healthy|healthcheck:|/agents" /tmp/phase05-fullstack-compose.yml</automated>
</verify>
<done>Both compose files render successfully and express distinct operational roles: prod handoff vs internal full-stack testing.</done>
</task>
<task type="auto">
<name>Task 2: Update deployment docs and operator guidance for the split artifacts</name>
<files>README.md, docs/deploy-architecture.md</files>
<read_first>README.md, docs/deploy-architecture.md, docker-compose.prod.yml, docker-compose.fullstack.yml, .env.example</read_first>
<acceptance_criteria>
- README or deploy doc tells the operator exactly which compose file to use for production vs internal E2E.
- The docs describe the shared `/agents` volume behavior and reference the relevant env vars.
- The old root `docker-compose.yml` is no longer the primary documented deployment path.
</acceptance_criteria>
<action>Update the repo docs so the Phase 05 deployment story is executable without inference: production handoff stays bot-only, full-stack compose is for internal E2E, and shared-volume file behavior is described in the same terms as the runtime artifacts. Keep documentation narrowly scoped to the shipped compose split; do not widen into platform-master or future storage design.</action>
<verify>
<automated>rg -n "docker-compose\\.prod|docker-compose\\.fullstack|/agents|prod handoff|full-stack" README.md docs/deploy-architecture.md .env.example && rg -n "production|deploy" README.md docs/deploy-architecture.md | rg "docker-compose\\.prod" && test -z "$(rg -n "docker compose up|docker-compose\\.yml" README.md docs/deploy-architecture.md | rg "production|deploy")"</automated>
</verify>
<done>The docs and env guidance match the new compose artifacts and no longer imply a single shared deployment file.</done>
</task>
</tasks>
<verification>
Render both compose files, then grep the docs for the new artifact names and `/agents` references so the operator contract is explicit and consistent.
</verification>
<success_criteria>
An operator can deploy the Matrix bot with the bot-only compose file, while developers can run the internal end-to-end harness separately without reinterpreting the deployment docs.
</success_criteria>
<output>
After completion, create `.planning/phases/05-mvp-deployment/05-04-SUMMARY.md`
</output>

View file

@ -1,157 +0,0 @@
# Phase 05: MVP Deployment — Context
**Gathered:** 2026-04-27
**Status:** Ready for planning
<domain>
## Phase Boundary
Подготовить Matrix-бот к реальному деплою на lambda.coredump.ru:
1. Перейти на single-chat архитектуру (chat_id=0, один контекст на пользователя)
2. Упростить онбординг: DM-first без Space/rooms provisioning, welcome-сообщение при invite
3. Расширить config/matrix-agents.yaml — добавить user_agents (Matrix user_id → agent_id) и per-agent base_url/workspace_path
4. Обновить AgentRegistry и _build_platform_from_env для per-agent URL routing
5. Реализовать file transfer через shared volume /agents/: входящие → incoming/{filename}, исходящие через MsgEventSendFile
6. Добавить !clear (сброс контекста через переподключение AgentApi)
7. Написать docker-compose.prod.yml с полным стеком (matrix-bot + placeholder agent + named volume agents)
8. Удалить legacy: !agent, !new, !archive, !rename, !save, !load, Space-creation, C1/C2/C3 room provisioning
НЕ входит:
- Конфигурация агентских контейнеров (платформа)
- Telegram-адаптер
- E2EE
- platform-master интеграция
- !save / !load (ненадёжны без persistent memory в агенте)
</domain>
<decisions>
## Implementation Decisions
### Single-chat архитектура
- **D-01:** chat_id=0 для всех сообщений. Один контекст агента на пользователя. Изоляции между разными разговорами нет — вместо этого `!clear` сбрасывает контекст.
- **D-02:** Удалить всю multi-room инфраструктуру: C1/C2/C3, `!new`, `!archive`, `!rename`, Space-creation, room provisioning. Matrix-бот работает только в DM-комнате (личка с ботом).
- **D-03:** Удалить `!save` и `!load` — ненадёжны без persistent memory в агенте (MemorySaver сбрасывается на рестарте).
### Онбординг (DM-first)
- **D-04:** При получении invite в DM-комнату — принять, отправить welcome-сообщение: "Привет! Я Lambda AI-агент. Просто напиши — и я отвечу. `!clear` чтобы начать новый разговор, `!context` чтобы посмотреть статус."
- **D-05:** Никакого Space, никаких дочерних комнат. Вся переписка в одной DM-комнате.
### !clear (новая команда)
- **D-06:** Сбросить контекст агента — закрыть текущий AgentApi connection и создать новый (`await agent.close()` + `await agent.connect()`). Это сбрасывает MemorySaver. Подтвердить пользователю: "Контекст сброшен. Начнём с чистого листа."
### !agent команда
- **D-07:** Удалить полностью. Маппинг user→agent теперь статический из config. Пользователь не может менять агента.
### Конфиг агентов (config/matrix-agents.yaml)
- **D-02:** Расширить текущий matrix-agents.yaml — добавить user_agents dict и поля base_url/workspace_path к каждому агенту. Один файл, один парсер. Формат по docs/deploy-architecture.md:
```yaml
user_agents:
"@user0:matrix.lambda.coredump.ru": agent-0
"@user1:matrix.lambda.coredump.ru": agent-1
agents:
- id: agent-0
label: "Agent 0"
base_url: "ws://lambda.coredump.ru:7000/agent_0/"
workspace_path: "/agents/0/"
```
- **D-03:** AgentDefinition расширяется полями base_url (str) и workspace_path (str). AgentRegistry добавляет user_agents dict (Matrix user_id → agent_id) и метод get_agent_id_by_user(matrix_user_id).
### Роутинг user → agent в _build_platform_from_env
- **D-04:** Вместо глобального AGENT_BASE_URL — per-agent URL из конфига. _build_platform_from_env строит delegates с правильным base_url для каждого агента. RoutedPlatformClient._resolve_delegate использует user_agents из registry для определения delegate по Matrix user_id.
### Входящие файлы (пользователь → агент)
- **D-05:** Путь внутри workspace агента: `incoming/{filename}`. Абсолютный путь: `{workspace_path}/incoming/{filename}` (например `/agents/0/incoming/photo.jpg`). Обновить files.py: `build_workspace_attachment_path` принимает workspace_path агента и строит путь `incoming/{filename}`. Передавать в agent.send_message() как attachments=["incoming/{filename}"] (относительно /workspace).
- **D-06:** workspace_path агента берётся из AgentDefinition по agent_id пользователя.
### Исходящие файлы (агент → пользователь)
- **D-07:** При получении MsgEventSendFile(path="output/report.pdf") — читать файл из `{workspace_path}/{path}`. Отправлять как Matrix file message. Обработчик в Matrix bot.py при обработке stream-ответов от агента.
### docker-compose для prod
- **D-08:** `docker-compose.prod.yml` включает полный стек: Matrix-бот + агент-контейнер (placeholder image `lambda-agent:latest` — уточнить у платформы) + named volume `agents`. Это позволяет тестировать полный стек самостоятельно. Платформа берёт отсюда схему интеграции для своего деплоя.
- **D-09:** Named volume `agents` монтируется в Matrix-бот как `/agents/` и в агент-контейнер как `/workspace`. Env vars из `.env.prod`. Запуск: `docker compose -f docker-compose.prod.yml up`.
### Неавторизованные пользователи
- **D-10:** Если Matrix user_id не найден в `user_agents` — принять invite, отправить сообщение: "К вашему аккаунту не привязан агент. Напишите @og_mput в Telegram для получения доступа." Дальнейшие сообщения игнорировать (или повторять то же сообщение).
### !clear
- **D-11:** Без диалога подтверждения — сбрасывает немедленно. Закрыть текущий AgentApi connection, создать новый. Ответ пользователю: "Контекст сброшен."
### !settings и прочие команды настроек
- **D-12:** Удалить `!settings`, `!settings soul`, `!settings skills`, `!settings safety` — agent_api не предоставляет настроек, всё равно возвращало "недоступно в MVP".
### Claude's Discretion
- MATRIX_AGENT_REGISTRY_PATH — оставить как env var для пути к конфигу (уже существует)
- Формат .env.prod
- Group room invites (не-DM) — отклонять автоматически
- Существующие Space+rooms у старых пользователей — игнорировать, не мигрировать
</decisions>
<canonical_refs>
## Canonical References
**Downstream agents MUST read these before planning or implementing.**
### Deployment architecture (PRIMARY)
- `docs/deploy-architecture.md` — Топология, формат конфига, AgentApi lifecycle, file transfer protocol, открытые вопросы
### Существующий код (изменяем)
- `adapter/matrix/agent_registry.py` — AgentRegistry, AgentDefinition, load_agent_registry — расширяем
- `adapter/matrix/bot.py` — _build_platform_from_env, _load_agent_registry_from_env — обновляем роутинг
- `adapter/matrix/routed_platform.py` — RoutedPlatformClient._resolve_delegate — обновляем логику
- `adapter/matrix/files.py` — build_workspace_attachment_path, download_matrix_attachment — меняем путь
- `adapter/matrix/handlers/agent.py` — удаляем или делаем no-op (!agent handler)
- `config/matrix-agents.yaml` — расширяем формат
- `docker-compose.yml` — существующий dev compose (за основу для prod варианта)
### SDK (используем как есть)
- `sdk/real.py` — RealPlatformClient — base_url теперь per-instance, но сам класс не меняется
- `sdk/upstream_agent_api.py` — AgentApi, MsgEventSendFile — читаем MsgEventSendFile в стриме
</canonical_refs>
<code_context>
## Existing Code Insights
### Reusable Assets
- `adapter/matrix/files.py::build_workspace_attachment_path` — уже строит путь к файлу, нужно заменить логику `surfaces/matrix/...` на `incoming/{filename}`
- `adapter/matrix/files.py::download_matrix_attachment` — скачивает файл, нужно передавать workspace_path агента
- `adapter/matrix/agent_registry.py::load_agent_registry` — парсер YAML, расширяем без переписывания
### Established Patterns
- `RoutedPlatformClient` + delegates: dict[agent_id, RealPlatformClient] — паттерн уже есть, нужно только per-agent URL при создании delegates
- `MATRIX_PLATFORM_BACKEND=real` активирует prod-path — сохраняем
- `MATRIX_AGENT_REGISTRY_PATH` — env var для пути к конфигу — сохраняем
### Integration Points
- `_build_platform_from_env` создаёт delegates — здесь меняется источник URL (из конфига, не из env)
- `RoutedPlatformClient._resolve_delegate` — здесь добавляется lookup по user_agents
- Matrix bot stream handler — здесь добавляется обработка MsgEventSendFile
</code_context>
<specifics>
## Specific Ideas
- AgentApi конструктор в master ветке: `AgentApi(agent_id, base_url, on_disconnect=..., chat_id=0)` — base_url это ws:// URL агента
- Входящий файл: bot скачивает из Matrix → пишет в `{workspace_path}/incoming/{filename}` → вызывает `agent.send_message(text, attachments=["incoming/{filename}"])` (путь relative to /workspace)
- Исходящий файл: при `MsgEventSendFile(path="output/report.pdf")` → читаем `{workspace_path}/output/report.pdf` → отправляем в Matrix через `client.upload()``client.room_send(m.file)`
- docker-compose.prod.yml монтирует volume: `volumes: ["/agents/:/agents/"]` — хост обеспечивает директорию
</specifics>
<deferred>
## Deferred Ideas
- platform-master интеграция (динамический get_agent_url через POST /api/v1/create) — когда feat/storage будет готов
- !agent как admin-override — не нужен для MVP, можно добавить позже если потребуется
- Per-chat context isolation через разные chat_id (сейчас chat_id=0 для всех) — ждём platform сигнал
</deferred>
---
*Phase: 05-mvp-deployment*
*Context gathered: 2026-04-27*

View file

@ -1,65 +0,0 @@
# Phase 05: MVP Deployment — Discussion Log
> **Audit trail only.** Do not use as input to planning, research, or execution agents.
> Decisions captured in CONTEXT.md — this log preserves the alternatives considered.
**Date:** 2026-04-27
**Phase:** 05-mvp-deployment
**Areas discussed:** !agent legacy, file transfer path, config format, docker-compose scope
---
## !agent команда
| Option | Description | Selected |
|--------|-------------|----------|
| Удалить | Убираем полностью — маппинг статический из конфига | ✓ |
| Оставить как no-op | Команда остаётся но ничего не делает | |
| Только для dev-режима | Работает когда нет user_agents в конфиге | |
**User's choice:** Удалить
**Notes:** Команда была legacy от эпохи когда роутинг был динамическим. С user_agents в конфиге она не нужна.
---
## Путь входящих файлов
| Option | Description | Selected |
|--------|-------------|----------|
| incoming/{filename} | По docs/deploy-architecture.md — /agents/N/incoming/file | ✓ |
| surfaces/matrix/{user}/{room}/inbox/{file} | Текущий формат files.py | |
**User's choice:** incoming/{filename}
**Notes:** Пользователь указал — это решение от платформенной команды, зафиксировано в docs/deploy-architecture.md.
---
## Формат config/matrix-agents.yaml
| Option | Description | Selected |
|--------|-------------|----------|
| Расширить текущий YAML | Добавить user_agents + base_url/workspace_path в тот же файл | ✓ |
| Отдельный prod-config.yaml | Два файла: registry (id/label) + prod конфиг (URL/user_agents) | |
**User's choice:** Расширить текущий YAML
**Notes:** Один файл проще. Формат уже определён в docs/deploy-architecture.md.
---
## docker-compose prod scope
**User's choice:** docker-compose.prod.yml только для Matrix-бота
**Notes:** Платформа отвечает за агентские контейнеры — мы их не трогаем. Matrix-бот монтирует /agents/ как external host path, платформа обеспечивает содержимое.
---
## Claude's Discretion
- Обработка Matrix user_id не найденного в user_agents
- Имена env переменных для prod
- Формат .env.prod
## Deferred Ideas
- platform-master интеграция
- Per-chat chat_id isolation

View file

@ -1,13 +1,13 @@
--- ---
phase: 5 phase: 05
slug: mvp-deployment slug: mvp-deployment
status: draft status: revised
nyquist_compliant: false nyquist_compliant: true
wave_0_complete: false wave_0_complete: false
created: 2026-04-27 created: 2026-04-28
--- ---
# Phase 5 — Validation Strategy # Phase 05 — Validation Strategy
> Per-phase validation contract for feedback sampling during execution. > Per-phase validation contract for feedback sampling during execution.
@ -17,35 +17,35 @@ created: 2026-04-27
| Property | Value | | Property | Value |
|----------|-------| |----------|-------|
| **Framework** | pytest | | **Framework** | `pytest` + `pytest-asyncio` |
| **Config file** | pyproject.toml | | **Config file** | `pyproject.toml` |
| **Quick run command** | `pytest tests/adapter/matrix/ -v -x` | | **Quick run command** | `pytest tests/adapter/matrix/test_reconciliation.py tests/adapter/matrix/test_restart_persistence.py -v` |
| **Full suite command** | `pytest tests/ -v` | | **Full suite command** | `pytest tests/ -v` |
| **Estimated runtime** | ~30 seconds | | **Estimated runtime** | targeted slices < 60 seconds each; full suite longer |
--- ---
## Sampling Rate ## Sampling Rate
- **After every task commit:** Run `pytest tests/adapter/matrix/ -v -x` - **After every task commit:** Run the exact `<automated>` command from the task that just changed
- **After every plan wave:** Run `pytest tests/ -v` - **After every plan wave:** Run `pytest tests/adapter/matrix/ -v`
- **Before `/gsd-verify-work`:** Full suite must be green - **Before `$gsd-verify-work`:** Full suite must be green
- **Max feedback latency:** 30 seconds - **Max feedback latency:** 60 seconds for task-level slices
--- ---
## Per-Task Verification Map ## Per-Task Verification Map
| Task ID | Plan | Wave | Requirement | Threat Ref | Secure Behavior | Test Type | Automated Command | File Exists | Status | | Task ID | Plan | Wave | Requirement | Test Type | Automated Command | File Exists | Status |
|---------|------|------|-------------|------------|-----------------|-----------|-------------------|-------------|--------| |---------|------|------|-------------|-----------|-------------------|-------------|--------|
| 05-A-01 | A | 1 | D-02/D-03 | — | agent_id lookup by matrix_user_id only | unit | `pytest tests/adapter/matrix/test_agent_registry.py -v` | ❌ W0 | ⬜ pending | | 05-01-01 | 01 | 1 | PH05-01 | integration | `pytest tests/adapter/matrix/test_invite_space.py tests/adapter/matrix/test_chat_space.py tests/adapter/matrix/test_reconciliation.py tests/adapter/matrix/test_restart_persistence.py -v` | ❌ W0 | ⬜ pending |
| 05-A-02 | A | 1 | D-04 | — | per-agent URL used in delegates | unit | `pytest tests/adapter/matrix/test_routed_platform.py -v` | ❌ W0 | ⬜ pending | | 05-01-02 | 01 | 1 | PH05-03 | integration | `pytest tests/adapter/matrix/test_invite_space.py tests/adapter/matrix/test_chat_space.py tests/adapter/matrix/test_reconciliation.py tests/adapter/matrix/test_restart_persistence.py tests/adapter/matrix/test_dispatcher.py -v` | ❌ W0 | ⬜ pending |
| 05-B-01 | B | 1 | D-04/D-05 | — | welcome message sent on invite | unit | `pytest tests/adapter/matrix/test_onboarding.py -v` | ❌ W0 | ⬜ pending | | 05-02-01 | 02 | 2 | PH05-02 | integration | `pytest tests/adapter/matrix/test_context_commands.py tests/adapter/matrix/test_routed_platform.py -v` | ✅ partial | ⬜ pending |
| 05-B-02 | B | 1 | D-10 | — | unauthorized user gets access-denied message | unit | `pytest tests/adapter/matrix/test_onboarding.py::test_unauthorized -v` | ❌ W0 | ⬜ pending | | 05-02-02 | 02 | 2 | PH05-02 | integration | `pytest tests/adapter/matrix/test_context_commands.py tests/adapter/matrix/test_routed_platform.py tests/adapter/matrix/test_dispatcher.py -v` | ✅ partial | ⬜ pending |
| 05-B-03 | B | 1 | D-11 | — | !clear closes and reopens AgentApi | unit | `pytest tests/adapter/matrix/test_commands.py::test_clear -v` | ❌ W0 | ⬜ pending | | 05-03-01 | 03 | 1 | PH05-04 | integration | `pytest tests/adapter/matrix/test_files.py tests/platform/test_real.py -v` | ✅ partial | ⬜ pending |
| 05-C-01 | C | 2 | D-05/D-06 | — | incoming file written to workspace_path/incoming/ | unit | `pytest tests/adapter/matrix/test_files.py -v` | ✅ | ⬜ pending | | 05-03-02 | 03 | 1 | PH05-04 | integration | `pytest tests/adapter/matrix/test_files.py tests/platform/test_real.py tests/adapter/matrix/test_send_outgoing.py -v` | ✅ partial | ⬜ pending |
| 05-C-02 | C | 2 | D-07 | — | outgoing MsgEventSendFile reads from workspace_path | unit | `pytest tests/adapter/matrix/test_files.py::test_outgoing_file -v` | ❌ W0 | ⬜ pending | | 05-04-01 | 04 | 2 | PH05-05 | smoke | `docker compose -f docker-compose.prod.yml config && docker compose -f docker-compose.fullstack.yml config` | ❌ W0 | ⬜ pending |
| 05-C-03 | C | 2 | D-08/D-09 | — | docker-compose.prod.yml has agents volume and both services | manual | see below | N/A | ⬜ pending | | 05-04-02 | 04 | 2 | PH05-05 | docs smoke | `rg -n "docker-compose\\.prod|docker-compose\\.fullstack|/agents|prod handoff|full-stack" README.md docs/deploy-architecture.md .env.example` | ✅ | ⬜ pending |
*Status: ⬜ pending · ✅ green · ❌ red · ⚠️ flaky* *Status: ⬜ pending · ✅ green · ❌ red · ⚠️ flaky*
@ -53,13 +53,11 @@ created: 2026-04-27
## Wave 0 Requirements ## Wave 0 Requirements
- [ ] `tests/adapter/matrix/test_agent_registry.py` — tests for user_agents lookup and per-agent base_url/workspace_path - [ ] `tests/adapter/matrix/test_reconciliation.py` — startup recovery of user and room metadata from Matrix state
- [ ] `tests/adapter/matrix/test_routed_platform.py` — updated tests for _resolve_delegate using user_agents - [ ] `tests/adapter/matrix/test_restart_persistence.py` additions — deterministic backfill for legacy rooms missing `platform_chat_id`
- [ ] `tests/adapter/matrix/test_onboarding.py` — tests for invite handling, welcome message, unauthorized user response - [ ] `tests/adapter/matrix/test_context_commands.py` additions — room-local `!clear` rotation semantics
- [ ] `tests/adapter/matrix/test_commands.py` — tests for !clear command behavior - [ ] `tests/adapter/matrix/test_files.py` additions — cross-room attachment isolation and shared-root consistency
- [ ] Update `tests/adapter/matrix/test_files.py` — add outgoing file test - [ ] Compose smoke coverage or documented verification command for `docker-compose.prod.yml` and `docker-compose.fullstack.yml`
*Existing: `tests/adapter/matrix/test_files.py` — already exists, covers incoming file path logic*
--- ---
@ -67,8 +65,9 @@ created: 2026-04-27
| Behavior | Requirement | Why Manual | Test Instructions | | Behavior | Requirement | Why Manual | Test Instructions |
|----------|-------------|------------|-------------------| |----------|-------------|------------|-------------------|
| docker-compose.prod.yml full-stack launch | D-08/D-09 | Requires Docker daemon and lambda-agent:latest image | `docker compose -f docker-compose.prod.yml up` — verify both services start, volume mounts at /agents/ | | Restart after real Matrix room topology exists | PH05-03 | Full recovery depends on live Space hierarchy and persisted homeserver state | Start the bot, provision a Space and chat rooms, stop the bot, remove local SQLite metadata, restart, confirm routing and room labels are rebuilt before live messages are handled |
| Matrix bot invite + DM flow | D-04/D-05 | Requires live Matrix homeserver | Invite bot to DM, verify welcome message appears | | Shared `/agents` volume behavior across bot and platform containers | PH05-04 | Container mounts and permissions are environment-dependent | Run `docker compose -f docker-compose.fullstack.yml up`, upload a file in Matrix, confirm the agent sees the relative `workspace_path`, then confirm an agent-created file is readable back from the bot side |
| Operator handoff of prod compose | PH05-05 | Final deploy contract depends on real env files and target host conventions | Run `docker compose -f docker-compose.prod.yml config` on the target deployment checkout and confirm only bot services, required env vars, and shared volumes are present |
--- ---
@ -78,7 +77,7 @@ created: 2026-04-27
- [ ] Sampling continuity: no 3 consecutive tasks without automated verify - [ ] Sampling continuity: no 3 consecutive tasks without automated verify
- [ ] Wave 0 covers all MISSING references - [ ] Wave 0 covers all MISSING references
- [ ] No watch-mode flags - [ ] No watch-mode flags
- [ ] Feedback latency < 30s - [x] Feedback latency target tightened to task slices under 60s
- [ ] `nyquist_compliant: true` set in frontmatter - [x] `nyquist_compliant: true` set in frontmatter
**Approval:** pending **Approval:** pending

View file

@ -1,6 +1,8 @@
FROM python:3.11-slim AS base FROM python:3.11-slim AS base
WORKDIR /app WORKDIR /app
RUN useradd -u 1000 -m appuser
USER appuser
ENV PYTHONUNBUFFERED=1 ENV PYTHONUNBUFFERED=1
ENV PYTHONPATH=/app ENV PYTHONPATH=/app
@ -20,25 +22,25 @@ RUN uv sync --no-dev --no-install-project --frozen
FROM base AS development FROM base AS development
COPY . .
RUN uv sync --no-dev --frozen
# Local fullstack/dev builds can override the SDK with a checked-out agent_api # Local fullstack/dev builds can override the SDK with a checked-out agent_api
# build context, matching platform-agent's development Dockerfile pattern. # build context, matching platform-agent's development Dockerfile pattern.
COPY --from=agent_api . /agent_api/ COPY --from=agent_api . /agent_api/
RUN python -m pip install --no-cache-dir --ignore-requires-python -e /agent_api/ RUN python -m pip install --no-cache-dir --ignore-requires-python -e /agent_api/
COPY . .
RUN uv sync --no-dev --frozen
CMD ["python", "-m", "adapter.matrix.bot"] CMD ["python", "-m", "adapter.matrix.bot"]
FROM base AS production FROM base AS production
COPY . .
RUN uv sync --no-dev --frozen
# Production builds follow the platform-agent pattern: install the API SDK from # Production builds follow the platform-agent pattern: install the API SDK from
# the platform Git repository instead of relying on local external/ clones. # the platform Git repository instead of relying on local external/ clones.
ARG LAMBDA_AGENT_API_REF=master ARG LAMBDA_AGENT_API_REF=master
RUN python -m pip install --no-cache-dir --ignore-requires-python \ RUN python -m pip install --no-cache-dir --ignore-requires-python \
"git+https://git.lambda.coredump.ru/platform/agent_api.git@${LAMBDA_AGENT_API_REF}" "git+https://git.lambda.coredump.ru/platform/agent_api.git@${LAMBDA_AGENT_API_REF}"
COPY . .
RUN uv sync --no-dev --frozen
CMD ["python", "-m", "adapter.matrix.bot"] CMD ["python", "-m", "adapter.matrix.bot"]

View file

@ -22,8 +22,9 @@ Bot container Agent containers
/agents/N/ ←── volume ──→ agent_N: /workspace/ /agents/N/ ←── volume ──→ agent_N: /workspace/
``` ```
- Бот сохраняет входящий файл в `{workspace_path}/incoming/{stamp}-{file}` и передаёт агенту `attachments=["incoming/{stamp}-{file}"]` - Бот сохраняет входящий файл прямо в `{workspace_path}/{file}` и передаёт агенту `attachments=["{file}"]`
- Агент пишет исходящий файл в свой `/workspace/output/file`, бот читает его из `{workspace_path}/output/file` - Если файл с таким именем уже есть, бот сохраняет следующий как `file (1).ext`, `file (2).ext`, как в Windows
- Агент пишет исходящий файл прямо в свой `/workspace/file`, бот читает его из `{workspace_path}/file`
- `workspace_path` для каждого агента задаётся в `config/matrix-agents.yaml` - `workspace_path` для каждого агента задаётся в `config/matrix-agents.yaml`
**3. Конфиг агентов** **3. Конфиг агентов**
@ -128,7 +129,7 @@ agents:
- `user_agents` — маппинг Matrix user_id → agent_id. Если пользователь не найден — используется первый агент. - `user_agents` — маппинг Matrix user_id → agent_id. Если пользователь не найден — используется первый агент.
- `base_url` — HTTP URL агент-эндпоинта (path-based routing через reverse proxy). - `base_url` — HTTP URL агент-эндпоинта (path-based routing через reverse proxy).
- `workspace_path` — путь к воркспейсу агента внутри бот-контейнера на shared volume. - `workspace_path` — путь к воркспейсу агента внутри бот-контейнера на shared volume.
Бот сохраняет входящие файлы в `{workspace_path}/incoming/`, агент пишет исходящие в свой `/workspace/`. Бот сохраняет входящие файлы прямо в `{workspace_path}/`, агент пишет исходящие прямо в свой `/workspace/`.
- Для 25-30 агентов продолжайте тот же паттерн: `/agent_17/` + `/agents/17`, `/agent_29/` + `/agents/29`. - Для 25-30 агентов продолжайте тот же паттерн: `/agent_17/` + `/agents/17`, `/agent_29/` + `/agents/29`.
Полный пример с комментариями: `config/matrix-agents.example.yaml` Полный пример с комментариями: `config/matrix-agents.example.yaml`
@ -137,6 +138,15 @@ agents:
`docker-compose.prod.yml` — bot-only handoff через published image. Платформа добавляет этот сервис в свой compose рядом с agent containers, монтирует shared volume и задаёт переменные окружения. Этот compose не создаёт и не собирает агент-контейнеры. `docker-compose.prod.yml` — bot-only handoff через published image. Платформа добавляет этот сервис в свой compose рядом с agent containers, монтирует shared volume и задаёт переменные окружения. Этот compose не создаёт и не собирает агент-контейнеры.
Перед redeploy можно проверить реальные agent routes из той же сети, где будет работать бот:
```bash
PYTHONPATH=. uv run python -m tools.check_matrix_agents \
--config config/matrix-agents.yaml \
--timeout 5
```
Проверка открывает фактический WebSocket URL каждого агента (`.../v1/agent_ws/{chat_id}/`) и ждёт первый `STATUS`. Для проверки полного запроса к агенту добавьте `--message "ping"`.
Для запуска опубликованного image: Для запуска опубликованного image:
```bash ```bash
export SURFACES_BOT_IMAGE=mput1/surfaces-bot:latest export SURFACES_BOT_IMAGE=mput1/surfaces-bot:latest
@ -147,7 +157,7 @@ docker compose --env-file .env -f docker-compose.prod.yml up -d
```text ```text
mput1/surfaces-bot:latest mput1/surfaces-bot:latest
sha256:26ba3a49290ab7c1cf0fa97f3de3fefdc70b59df7e6f1e0c2255728f8e2369be sha256:2f135f3535f7765d4377b440cdabe41195ad2efbc3e175def159ae4689ef90bd
``` ```
Для сборки и публикации surface image: Для сборки и публикации surface image:
@ -183,12 +193,13 @@ rm -f lambda_matrix.db && rm -rf matrix_store
``` ```
Bot (/agents) Agent (/workspace = /agents/N/) Bot (/agents) Agent (/workspace = /agents/N/)
/agents/0/incoming/ ←──── одно и то же хранилище ────→ /workspace/incoming/ /agents/0/report.pdf ←──── одно и то же хранилище ────→ /workspace/report.pdf
/agents/0/output/ ←────────────────────────────────→ /workspace/output/ /agents/0/result.txt ←────────────────────────────────→ /workspace/result.txt
``` ```
- **Входящий файл** (пользователь → агент): бот сохраняет в `{workspace_path}/incoming/{stamp}-{file}`, например `/agents/17/incoming/report.pdf`, и передаёт агенту `attachments=["incoming/{stamp}-{file}"]` - **Входящий файл** (пользователь → агент): бот сохраняет в `{workspace_path}/{file}`, например `/agents/17/report.pdf`, и передаёт агенту `attachments=["report.pdf"]`
- **Исходящий файл** (агент → пользователь): агент пишет в `/workspace/output/file`, бот читает из `{workspace_path}/output/file`, например `/agents/17/output/file`, и отправляет пользователю как Matrix file message - **Коллизии имён**: если `/agents/17/report.pdf` уже существует, бот сохранит следующий файл как `/agents/17/report (1).pdf`, затем `/agents/17/report (2).pdf`
- **Исходящий файл** (агент → пользователь): агент пишет в `/workspace/file`, бот читает из `{workspace_path}/file`, например `/agents/17/result.txt`, и отправляет пользователю как Matrix file message
- `workspace_path` для каждого агента задаётся в `config/matrix-agents.yaml` - `workspace_path` для каждого агента задаётся в `config/matrix-agents.yaml`
--- ---

View file

@ -3,6 +3,7 @@ from __future__ import annotations
from collections.abc import Mapping from collections.abc import Mapping
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Literal
import yaml import yaml
@ -19,6 +20,16 @@ class AgentDefinition:
workspace_path: str = field(default="") workspace_path: str = field(default="")
@dataclass(frozen=True)
class AgentAssignment:
agent_id: str | None
source: Literal["configured", "default", "none"]
@property
def is_default(self) -> bool:
return self.source == "default"
class AgentRegistry: class AgentRegistry:
def __init__( def __init__(
self, self,
@ -38,6 +49,14 @@ class AgentRegistry:
def get_agent_id_for_user(self, matrix_user_id: str) -> str | None: def get_agent_id_for_user(self, matrix_user_id: str) -> str | None:
return self._user_agents.get(matrix_user_id) return self._user_agents.get(matrix_user_id)
def resolve_agent_for_user(self, matrix_user_id: str) -> AgentAssignment:
agent_id = self.get_agent_id_for_user(matrix_user_id)
if agent_id is not None:
return AgentAssignment(agent_id=agent_id, source="configured")
if self.agents:
return AgentAssignment(agent_id=self.agents[0].agent_id, source="default")
return AgentAssignment(agent_id=None, source="none")
def _required_text(entry: Mapping[str, object], key: str) -> str: def _required_text(entry: Mapping[str, object], key: str) -> str:
value = entry.get(key) value = entry.get(key)

View file

@ -1,6 +1,7 @@
from __future__ import annotations from __future__ import annotations
import asyncio import asyncio
import logging
import os import os
import re import re
from dataclasses import dataclass from dataclasses import dataclass
@ -24,21 +25,26 @@ from nio import (
) )
from nio.responses import SyncResponse from nio.responses import SyncResponse
from adapter.matrix.agent_registry import AgentRegistry, AgentRegistryError, load_agent_registry
from adapter.matrix.converter import from_room_event from adapter.matrix.converter import from_room_event
from adapter.matrix.files import ( from adapter.matrix.files import (
download_matrix_attachment, download_matrix_attachment,
matrix_msgtype_for_attachment, matrix_msgtype_for_attachment,
resolve_workspace_attachment_path, resolve_workspace_attachment_path,
) )
from adapter.matrix.agent_registry import AgentRegistry, AgentRegistryError, load_agent_registry
from adapter.matrix.handlers import register_matrix_handlers from adapter.matrix.handlers import register_matrix_handlers
from adapter.matrix.handlers.auth import handle_invite, provision_workspace_chat from adapter.matrix.handlers.auth import (
default_agent_notice,
handle_invite,
provision_workspace_chat,
restore_workspace_access,
)
from adapter.matrix.handlers.context_commands import ( from adapter.matrix.handlers.context_commands import (
LOAD_PROMPT, LOAD_PROMPT,
) )
from adapter.matrix.routed_platform import RoutedPlatformClient
from adapter.matrix.reconciliation import reconcile_startup_state from adapter.matrix.reconciliation import reconcile_startup_state
from adapter.matrix.room_router import resolve_chat_id from adapter.matrix.room_router import resolve_chat_id
from adapter.matrix.routed_platform import RoutedPlatformClient
from adapter.matrix.store import ( from adapter.matrix.store import (
add_staged_attachment, add_staged_attachment,
clear_load_pending, clear_load_pending,
@ -50,7 +56,6 @@ from adapter.matrix.store import (
remove_staged_attachment_at, remove_staged_attachment_at,
set_pending_confirm, set_pending_confirm,
set_platform_chat_id, set_platform_chat_id,
set_room_agent_id,
set_room_meta, set_room_meta,
) )
from core.auth import AuthManager from core.auth import AuthManager
@ -118,6 +123,26 @@ def _normalize_agent_base_url(url: str) -> str:
return urlunsplit((parsed.scheme, parsed.netloc, path, "", "")) return urlunsplit((parsed.scheme, parsed.netloc, path, "", ""))
def _ws_debug_enabled() -> bool:
value = os.environ.get("SURFACES_DEBUG_WS", "")
return value.strip().lower() in {"1", "true", "yes", "on"}
def _configure_debug_logging() -> None:
if not _ws_debug_enabled():
return
root_logger = logging.getLogger()
if not root_logger.handlers:
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)-8s] %(name)s %(message)s",
)
elif root_logger.level > logging.INFO:
root_logger.setLevel(logging.INFO)
logging.getLogger("lambda_agent_api").setLevel(logging.INFO)
logging.getLogger("lambda_agent_api.agent_api").setLevel(logging.INFO)
def _agent_base_url_from_env() -> str: def _agent_base_url_from_env() -> str:
if base_url := os.environ.get("AGENT_BASE_URL"): if base_url := os.environ.get("AGENT_BASE_URL"):
return base_url return base_url
@ -135,13 +160,39 @@ def _load_agent_registry_from_env(required: bool = False) -> AgentRegistry | Non
) )
return None return None
try: try:
return load_agent_registry(registry_path) registry = load_agent_registry(registry_path)
except (AgentRegistryError, OSError) as exc: except (AgentRegistryError, OSError) as exc:
raise RuntimeError(f"failed to load matrix agent registry: {registry_path}") from exc raise RuntimeError(f"failed to load matrix agent registry: {registry_path}") from exc
if _ws_debug_enabled():
logger.warning(
"matrix_agent_registry_loaded",
registry_path=registry_path,
agent_count=len(registry.agents),
)
for agent in registry.agents:
logger.warning(
"matrix_agent_registry_entry",
registry_path=registry_path,
agent_id=agent.agent_id,
label=agent.label,
configured_base_url=agent.base_url,
normalized_base_url=_normalize_agent_base_url(agent.base_url)
if agent.base_url
else "",
workspace_path=agent.workspace_path,
)
return registry
def _build_platform_from_env(*, store: StateStore, chat_mgr: ChatManager) -> PlatformClient: def _build_platform_from_env(*, store: StateStore, chat_mgr: ChatManager) -> PlatformClient:
backend = os.environ.get("MATRIX_PLATFORM_BACKEND", "mock").strip().lower() backend = os.environ.get("MATRIX_PLATFORM_BACKEND", "mock").strip().lower()
if _ws_debug_enabled():
logger.warning(
"matrix_platform_backend_selected",
backend=backend,
global_agent_base_url=_agent_base_url_from_env(),
registry_path=os.environ.get("MATRIX_AGENT_REGISTRY_PATH", "").strip(),
)
if backend == "real": if backend == "real":
prototype_state = PrototypeStateStore() prototype_state = PrototypeStateStore()
registry = _load_agent_registry_from_env(required=True) registry = _load_agent_registry_from_env(required=True)
@ -220,6 +271,36 @@ class MatrixBot:
await next_platform_chat_id(self.runtime.store), await next_platform_chat_id(self.runtime.store),
) )
async def _refresh_room_agent_assignment(
self, room_id: str, matrix_user_id: str, room_meta: dict | None
) -> tuple[dict | None, bool]:
if not room_meta or room_meta.get("redirect_room_id") or self.runtime.registry is None:
return room_meta, False
assignment = self.runtime.registry.resolve_agent_for_user(matrix_user_id)
updated = dict(room_meta)
should_warn_default = False
if assignment.source == "configured" and (
updated.get("agent_id") != assignment.agent_id
or updated.get("agent_assignment") != "configured"
):
updated["agent_id"] = assignment.agent_id
updated["agent_assignment"] = "configured"
updated.pop("default_agent_notice_sent", None)
elif assignment.source == "default":
if not updated.get("agent_id"):
updated["agent_id"] = assignment.agent_id
if updated.get("agent_id") == assignment.agent_id:
updated["agent_assignment"] = "default"
should_warn_default = not updated.get("default_agent_notice_sent")
updated["default_agent_notice_sent"] = True
if updated != room_meta:
await set_room_meta(self.runtime.store, room_id, updated)
return updated, should_warn_default
return room_meta, should_warn_default
async def on_room_message(self, room: MatrixRoom, event: RoomMessageText) -> None: async def on_room_message(self, room: MatrixRoom, event: RoomMessageText) -> None:
if getattr(event, "sender", None) == self.client.user_id: if getattr(event, "sender", None) == self.client.user_id:
return return
@ -228,6 +309,14 @@ class MatrixBot:
room_meta = await get_room_meta(self.runtime.store, room.room_id) room_meta = await get_room_meta(self.runtime.store, room.room_id)
if room_meta is not None and not room_meta.get("redirect_room_id"): if room_meta is not None and not room_meta.get("redirect_room_id"):
await self._ensure_platform_chat_id(room.room_id, room_meta) await self._ensure_platform_chat_id(room.room_id, room_meta)
room_meta, warn_default_agent = await self._refresh_room_agent_assignment(
room.room_id, sender, room_meta
)
if warn_default_agent and not body.startswith("!"):
await self._send_all(
room.room_id,
[OutgoingMessage(chat_id=room.room_id, text=default_agent_notice())],
)
load_pending = await get_load_pending(self.runtime.store, sender, room.room_id) load_pending = await get_load_pending(self.runtime.store, sender, room.room_id)
if load_pending is not None and (body.isdigit() or body == "!cancel"): if load_pending is not None and (body.isdigit() or body == "!cancel"):
@ -241,17 +330,97 @@ class MatrixBot:
await self._send_all(room.room_id, outgoing) await self._send_all(room.room_id, outgoing)
return return
elif room_meta.get("redirect_room_id"): elif room_meta.get("redirect_room_id"):
display_name = getattr(room, "display_name", None) or sender
if body == "!new":
try:
created = await provision_workspace_chat(
self.client,
sender,
display_name,
self.runtime.platform,
self.runtime.store,
self.runtime.auth_mgr,
self.runtime.chat_mgr,
registry=self.runtime.registry,
)
except Exception as exc:
logger.warning(
"matrix_entry_room_new_chat_failed",
room_id=room.room_id,
sender=sender,
error=str(exc),
)
await self._send_all(
room.room_id,
[
OutgoingMessage(
chat_id=room.room_id,
text="Не удалось создать новый рабочий чат.",
)
],
)
return
welcome = f"Создал новый рабочий чат {created['room_name']}."
if created.get("agent_assignment") == "default":
welcome = f"{welcome}\n\n{default_agent_notice()}"
await self.client.room_send(
created["chat_room_id"],
"m.room.message",
{"msgtype": "m.text", "body": welcome},
)
await set_room_meta(
self.runtime.store,
room.room_id,
{
**room_meta,
"redirect_room_id": created["chat_room_id"],
"redirect_chat_id": created["chat_id"],
},
)
await self._send_all(
room.room_id,
[
OutgoingMessage(
chat_id=room.room_id,
text=(
f"Создал рабочий чат {created['room_name']} "
f"({created['chat_id']}) и отправил приглашение."
),
)
],
)
return
restored = await restore_workspace_access(
self.client,
sender,
display_name,
self.runtime.platform,
self.runtime.store,
self.runtime.auth_mgr,
self.runtime.chat_mgr,
registry=self.runtime.registry,
)
redirect_room_id = room_meta["redirect_room_id"] redirect_room_id = room_meta["redirect_room_id"]
redirect_chat_id = room_meta.get("redirect_chat_id", "рабочий чат") redirect_chat_id = room_meta.get("redirect_chat_id", "рабочий чат")
if restored.get("created_new_chat"):
text = (
f"Создал новый рабочий чат {restored['room_name']} "
f"({restored['chat_id']}) и отправил приглашение."
)
else:
text = (
f"Рабочий чат уже создан: {redirect_chat_id}. "
"Я повторно отправил приглашения в пространство Lambda и рабочие чаты. "
"Чтобы создать новый чат, напишите !new здесь."
)
await self._send_all( await self._send_all(
room.room_id, room.room_id,
[ [
OutgoingMessage( OutgoingMessage(
chat_id=room.room_id, chat_id=room.room_id,
text=( text=text,
f"Рабочий чат уже создан: {redirect_chat_id}. "
"Открой приглашённую комнату для продолжения."
),
) )
], ],
) )
@ -302,6 +471,15 @@ class MatrixBot:
incoming, incoming,
) )
agent_id = (room_meta or {}).get("agent_id") agent_id = (room_meta or {}).get("agent_id")
if _ws_debug_enabled() and not body.startswith("!"):
logger.warning(
"matrix_incoming_message_route",
room_id=room.room_id,
sender=sender,
local_chat_id=local_chat_id,
agent_id=agent_id,
platform_chat_id=(room_meta or {}).get("platform_chat_id"),
)
workspace_root = self._agent_workspace_root(agent_id) workspace_root = self._agent_workspace_root(agent_id)
try: try:
outgoing = await self.runtime.dispatcher.dispatch(incoming) outgoing = await self.runtime.dispatcher.dispatch(incoming)
@ -520,6 +698,8 @@ class MatrixBot:
f"Привет, {created['user'].display_name or sender}! Пиши — я здесь.\n\n" f"Привет, {created['user'].display_name or sender}! Пиши — я здесь.\n\n"
"Команды: !new · !chats · !rename · !archive · !context · !save · !load · !help" "Команды: !new · !chats · !rename · !archive · !context · !save · !load · !help"
) )
if created.get("agent_assignment") == "default":
welcome = f"{welcome}\n\n{default_agent_notice()}"
await set_room_meta( await set_room_meta(
self.runtime.store, self.runtime.store,
room.room_id, room.room_id,
@ -715,6 +895,7 @@ async def send_outgoing(
async def main() -> None: async def main() -> None:
_configure_debug_logging()
homeserver = os.environ.get("MATRIX_HOMESERVER") homeserver = os.environ.get("MATRIX_HOMESERVER")
user_id = os.environ.get("MATRIX_USER_ID") user_id = os.environ.get("MATRIX_USER_ID")
device_id = os.environ.get("MATRIX_DEVICE_ID", "") device_id = os.environ.get("MATRIX_DEVICE_ID", "")
@ -768,6 +949,15 @@ async def main() -> None:
store_path=store_path, store_path=store_path,
request_timeout=client_config.request_timeout, request_timeout=client_config.request_timeout,
) )
if _ws_debug_enabled():
logger.warning(
"matrix_ws_debug_enabled",
homeserver=homeserver,
user_id=user_id,
backend=os.environ.get("MATRIX_PLATFORM_BACKEND", "mock").strip().lower(),
global_agent_base_url=_agent_base_url_from_env(),
registry_path=os.environ.get("MATRIX_AGENT_REGISTRY_PATH", "").strip(),
)
try: try:
await client.sync_forever(timeout=30000, since=since_token) await client.sync_forever(timeout=30000, since=since_token)
finally: finally:

View file

@ -2,16 +2,16 @@ from __future__ import annotations
import mimetypes import mimetypes
import re import re
from datetime import UTC, datetime from pathlib import Path, PurePosixPath
from pathlib import Path
from core.protocol import Attachment from core.protocol import Attachment
def _sanitize_component(value: str) -> str: def _sanitize_filename(value: str) -> str:
cleaned = re.sub(r"[^A-Za-z0-9._-]+", "_", value) filename = PurePosixPath(str(value).replace("\\", "/")).name.strip()
cleaned = cleaned.strip("._-") cleaned = re.sub(r"[\x00-\x1f\x7f<>:\"/\\|?*]+", "_", filename)
return cleaned or "unknown" cleaned = cleaned.strip(" .")
return cleaned or "attachment.bin"
def _default_filename(attachment: Attachment) -> str: def _default_filename(attachment: Attachment) -> str:
@ -28,38 +28,38 @@ def _default_filename(attachment: Attachment) -> str:
return f"{base}{extension}" return f"{base}{extension}"
def build_workspace_attachment_path( def _with_copy_index(filename: str, index: int) -> str:
*, path = Path(filename)
workspace_root: Path, suffix = path.suffix
matrix_user_id: str, stem = path.stem if suffix else filename
room_id: str, return f"{stem} ({index}){suffix}"
filename: str,
timestamp: str | None = None,
) -> tuple[str, Path]:
"""Legacy path builder used when no per-agent workspace_path is configured."""
stamp = timestamp or datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
safe_user = _sanitize_component(matrix_user_id.lstrip("@"))
safe_room = _sanitize_component(room_id.lstrip("!"))
safe_name = _sanitize_component(filename) or "attachment.bin"
relative_path = (
Path("surfaces") / "matrix" / safe_user / safe_room / "inbox" / f"{stamp}-{safe_name}"
)
return relative_path.as_posix(), workspace_root / relative_path
def build_agent_incoming_path( def _unique_workspace_relative_path(workspace_root: Path, filename: str) -> tuple[str, Path]:
safe_name = _sanitize_filename(filename)
candidate = workspace_root / safe_name
if not candidate.exists():
return safe_name, candidate
index = 1
while True:
indexed_name = _with_copy_index(safe_name, index)
candidate = workspace_root / indexed_name
if not candidate.exists():
return indexed_name, candidate
index += 1
def build_agent_workspace_path(
*, *,
workspace_root: Path, workspace_root: Path,
filename: str, filename: str,
timestamp: str | None = None,
) -> tuple[str, Path]: ) -> tuple[str, Path]:
"""Per-agent path builder: saves to {workspace_root}/incoming/{stamp}-{filename}. """Saves user files directly to {workspace_root}/{filename}.
The returned relative path is what gets passed to agent.send_message(attachments=[...]). The returned relative path is what gets passed to agent.send_message(attachments=[...]).
""" """
stamp = timestamp or datetime.now(UTC).strftime("%Y%m%d-%H%M%S") return _unique_workspace_relative_path(workspace_root, filename)
safe_name = _sanitize_component(filename) or "attachment.bin"
relative_path = Path("incoming") / f"{stamp}-{safe_name}"
return relative_path.as_posix(), workspace_root / relative_path
async def download_matrix_attachment( async def download_matrix_attachment(
@ -76,21 +76,11 @@ async def download_matrix_attachment(
filename = _default_filename(attachment) filename = _default_filename(attachment)
if workspace_root.name and str(workspace_root) not in (".", "/workspace", "/agents"): del matrix_user_id, room_id, timestamp
# Per-agent workspace configured — use simple incoming/ layout relative_path, absolute_path = build_agent_workspace_path(
relative_path, absolute_path = build_agent_incoming_path( workspace_root=workspace_root,
workspace_root=workspace_root, filename=filename,
filename=filename, )
timestamp=timestamp,
)
else:
relative_path, absolute_path = build_workspace_attachment_path(
workspace_root=workspace_root,
matrix_user_id=matrix_user_id,
room_id=room_id,
filename=filename,
timestamp=timestamp,
)
absolute_path.parent.mkdir(parents=True, exist_ok=True) absolute_path.parent.mkdir(parents=True, exist_ok=True)

View file

@ -22,6 +22,31 @@ def _default_room_name(chat_id: str) -> str:
return f"Чат {suffix}" return f"Чат {suffix}"
def default_agent_notice() -> str:
return (
"Внимание: ваш Matrix ID не найден в конфиге агентов. "
"Пока используется агент по умолчанию. После добавления вас в конфиг "
"бот переключит существующие комнаты на назначенного агента."
)
async def _invite_if_possible(client: Any, room_id: str, matrix_user_id: str) -> bool:
room_invite = getattr(client, "room_invite", None)
if not callable(room_invite):
return False
try:
await room_invite(room_id, matrix_user_id)
return True
except Exception as exc:
logger.warning(
"matrix_workspace_reinvite_failed",
room_id=room_id,
user=matrix_user_id,
error=str(exc),
)
return False
async def provision_workspace_chat( async def provision_workspace_chat(
client: Any, client: Any,
matrix_user_id: str, matrix_user_id: str,
@ -68,10 +93,11 @@ async def provision_workspace_chat(
room_name = room_name_override or _default_room_name(chat_id) room_name = room_name_override or _default_room_name(chat_id)
agent_id = None agent_id = None
agent_assignment = "none"
if registry is not None: if registry is not None:
agent_id = registry.get_agent_id_for_user(matrix_user_id) assignment = registry.resolve_agent_for_user(matrix_user_id)
if agent_id is None and registry.agents: agent_id = assignment.agent_id
agent_id = registry.agents[0].agent_id agent_assignment = assignment.source
chat_resp = await client.room_create( chat_resp = await client.room_create(
name=room_name, name=room_name,
@ -110,6 +136,7 @@ async def provision_workspace_chat(
"space_id": space_id, "space_id": space_id,
"platform_chat_id": platform_chat_id, "platform_chat_id": platform_chat_id,
"agent_id": agent_id, "agent_id": agent_id,
"agent_assignment": agent_assignment,
}, },
) )
await chat_mgr.get_or_create( await chat_mgr.get_or_create(
@ -126,6 +153,64 @@ async def provision_workspace_chat(
"chat_room_id": chat_room_id, "chat_room_id": chat_room_id,
"chat_id": chat_id, "chat_id": chat_id,
"room_name": room_name, "room_name": room_name,
"agent_assignment": agent_assignment,
"agent_id": agent_id,
}
async def restore_workspace_access(
client: Any,
matrix_user_id: str,
display_name: str,
platform,
store,
auth_mgr,
chat_mgr,
registry: AgentRegistry | None = None,
) -> dict:
user_meta = await get_user_meta(store, matrix_user_id) or {}
space_id = user_meta.get("space_id")
if not space_id:
created = await provision_workspace_chat(
client,
matrix_user_id,
display_name,
platform,
store,
auth_mgr,
chat_mgr,
room_name_override="Чат 1",
registry=registry,
)
return {**created, "reinvited_rooms": [], "created_new_chat": True}
await auth_mgr.confirm(matrix_user_id)
await _invite_if_possible(client, space_id, matrix_user_id)
chats = await chat_mgr.list_active(matrix_user_id)
if not chats:
created = await provision_workspace_chat(
client,
matrix_user_id,
display_name,
platform,
store,
auth_mgr,
chat_mgr,
registry=registry,
)
return {**created, "reinvited_rooms": [], "created_new_chat": True}
reinvited_rooms = []
for chat in chats:
if chat.surface_ref:
if await _invite_if_possible(client, chat.surface_ref, matrix_user_id):
reinvited_rooms.append(chat.surface_ref)
return {
"space_id": space_id,
"reinvited_rooms": reinvited_rooms,
"created_new_chat": False,
} }
@ -146,6 +231,29 @@ async def handle_invite(
existing = await get_user_meta(store, matrix_user_id) existing = await get_user_meta(store, matrix_user_id)
if existing and existing.get("space_id"): if existing and existing.get("space_id"):
restored = await restore_workspace_access(
client,
matrix_user_id,
display_name,
platform,
store,
auth_mgr,
chat_mgr,
registry=registry,
)
body = "Я отправил повторные приглашения в пространство Lambda и рабочие чаты."
if restored.get("created_new_chat"):
body = (
f"Создал новый рабочий чат {restored['room_name']} "
f"({restored['chat_id']}) и отправил приглашение."
)
if restored.get("agent_assignment") == "default":
body = f"{body}\n\n{default_agent_notice()}"
await client.room_send(
room.room_id,
"m.room.message",
{"msgtype": "m.text", "body": body},
)
return return
try: try:
@ -168,6 +276,8 @@ async def handle_invite(
f"Привет, {created['user'].display_name or matrix_user_id}! Пиши — я здесь.\n\n" f"Привет, {created['user'].display_name or matrix_user_id}! Пиши — я здесь.\n\n"
"Команды: !new · !chats · !rename · !archive · !clear · !help" "Команды: !new · !chats · !rename · !archive · !clear · !help"
) )
if created.get("agent_assignment") == "default":
welcome = f"{welcome}\n\n{default_agent_notice()}"
await client.room_send( await client.room_send(
created["chat_room_id"], created["chat_room_id"],
"m.room.message", "m.room.message",

View file

@ -8,6 +8,7 @@ from nio.api import RoomVisibility
from nio.responses import RoomCreateError from nio.responses import RoomCreateError
from adapter.matrix.agent_registry import AgentRegistry from adapter.matrix.agent_registry import AgentRegistry
from adapter.matrix.handlers.auth import default_agent_notice
from adapter.matrix.store import ( from adapter.matrix.store import (
get_user_meta, get_user_meta,
next_chat_id, next_chat_id,
@ -107,10 +108,11 @@ def make_handle_new_chat(
) )
agent_id = None agent_id = None
agent_assignment = "none"
if registry is not None: if registry is not None:
agent_id = registry.get_agent_id_for_user(event.user_id) assignment = registry.resolve_agent_for_user(event.user_id)
if agent_id is None and registry.agents: agent_id = assignment.agent_id
agent_id = registry.agents[0].agent_id agent_assignment = assignment.source
room_meta: dict = { room_meta: dict = {
"room_type": "chat", "room_type": "chat",
@ -120,6 +122,7 @@ def make_handle_new_chat(
"space_id": space_id, "space_id": space_id,
"platform_chat_id": platform_chat_id, "platform_chat_id": platform_chat_id,
"agent_id": agent_id, "agent_id": agent_id,
"agent_assignment": agent_assignment,
} }
await set_room_meta(store, room_id, room_meta) await set_room_meta(store, room_id, room_meta)
ctx = await chat_mgr.get_or_create( ctx = await chat_mgr.get_or_create(
@ -129,10 +132,13 @@ def make_handle_new_chat(
surface_ref=room_id, surface_ref=room_id,
name=room_name, name=room_name,
) )
text = f"Создан чат: {ctx.display_name} ({ctx.chat_id})"
if agent_assignment == "default":
text = f"{text}\n\n{default_agent_notice()}"
return [ return [
OutgoingMessage( OutgoingMessage(
chat_id=event.chat_id, chat_id=event.chat_id,
text=f"Создан чат: {ctx.display_name} ({ctx.chat_id})", text=text,
) )
] ]

View file

@ -48,7 +48,9 @@ def _chat_id_from_room(room: object, existing_meta: dict | None) -> str | None:
return None return None
def _space_id_for_room(room: object, rooms_by_id: dict[str, object], existing_meta: dict | None) -> str | None: def _space_id_for_room(
room: object, rooms_by_id: dict[str, object], existing_meta: dict | None
) -> str | None:
existing_space_id = (existing_meta or {}).get("space_id") existing_space_id = (existing_meta or {}).get("space_id")
if isinstance(existing_space_id, str) and existing_space_id: if isinstance(existing_space_id, str) and existing_space_id:
return existing_space_id return existing_space_id
@ -69,7 +71,9 @@ def _space_id_for_room(room: object, rooms_by_id: dict[str, object], existing_me
return None return None
def _matrix_user_id_for_room(room: object, bot_user_id: str | None, existing_meta: dict | None) -> str | None: def _matrix_user_id_for_room(
room: object, bot_user_id: str | None, existing_meta: dict | None
) -> str | None:
existing_user_id = (existing_meta or {}).get("matrix_user_id") existing_user_id = (existing_meta or {}).get("matrix_user_id")
if isinstance(existing_user_id, str) and existing_user_id: if isinstance(existing_user_id, str) and existing_user_id:
return existing_user_id return existing_user_id
@ -128,11 +132,26 @@ async def reconcile_startup_state(client: object, runtime: object) -> Reconcilia
if not room_meta.get("agent_id"): if not room_meta.get("agent_id"):
registry = getattr(runtime, "registry", None) registry = getattr(runtime, "registry", None)
if registry is not None: if registry is not None:
agent_id = registry.get_agent_id_for_user(matrix_user_id) assignment = registry.resolve_agent_for_user(matrix_user_id)
if agent_id is None and registry.agents: if assignment.agent_id:
agent_id = registry.agents[0].agent_id room_meta["agent_id"] = assignment.agent_id
if agent_id: room_meta["agent_assignment"] = assignment.source
room_meta["agent_id"] = agent_id else:
registry = getattr(runtime, "registry", None)
if registry is not None:
assignment = registry.resolve_agent_for_user(matrix_user_id)
if assignment.source == "configured" and (
room_meta.get("agent_id") != assignment.agent_id
or room_meta.get("agent_assignment") != "configured"
):
room_meta["agent_id"] = assignment.agent_id
room_meta["agent_assignment"] = "configured"
elif (
assignment.source == "default"
and room_meta.get("agent_id") == assignment.agent_id
and not room_meta.get("agent_assignment")
):
room_meta["agent_assignment"] = "default"
if existing_meta is None: if existing_meta is None:
result.recovered_rooms += 1 result.recovered_rooms += 1
@ -153,7 +172,9 @@ async def reconcile_startup_state(client: object, runtime: object) -> Reconcilia
user_meta = dict(await get_user_meta(runtime.store, matrix_user_id) or {}) user_meta = dict(await get_user_meta(runtime.store, matrix_user_id) or {})
user_meta["space_id"] = user_meta.get("space_id") or recovered_space_id user_meta["space_id"] = user_meta.get("space_id") or recovered_space_id
next_chat_index = max_chat_index_by_user[matrix_user_id] + 1 next_chat_index = max_chat_index_by_user[matrix_user_id] + 1
user_meta["next_chat_index"] = max(int(user_meta.get("next_chat_index", 1)), next_chat_index) user_meta["next_chat_index"] = max(
int(user_meta.get("next_chat_index", 1)), next_chat_index
)
await set_user_meta(runtime.store, matrix_user_id, user_meta) await set_user_meta(runtime.store, matrix_user_id, user_meta)
return result return result

View file

@ -1,7 +1,10 @@
from __future__ import annotations from __future__ import annotations
import os
from collections.abc import AsyncIterator, Mapping from collections.abc import AsyncIterator, Mapping
import structlog
from adapter.matrix.store import get_room_meta from adapter.matrix.store import get_room_meta
from core.chat import ChatManager from core.chat import ChatManager
from core.store import StateStore from core.store import StateStore
@ -15,6 +18,13 @@ from sdk.interface import (
UserSettings, UserSettings,
) )
logger = structlog.get_logger(__name__)
def _ws_debug_enabled() -> bool:
value = os.environ.get("SURFACES_DEBUG_WS", "")
return value.strip().lower() in {"1", "true", "yes", "on"}
class RoutedPlatformClient(PlatformClient): class RoutedPlatformClient(PlatformClient):
def __init__( def __init__(
@ -77,7 +87,9 @@ class RoutedPlatformClient(PlatformClient):
if callable(close): if callable(close):
await close() await close()
async def _resolve_delegate(self, user_id: str, local_chat_id: str) -> tuple[PlatformClient, str]: async def _resolve_delegate(
self, user_id: str, local_chat_id: str
) -> tuple[PlatformClient, str]:
chat = await self._chat_mgr.get(local_chat_id, user_id) chat = await self._chat_mgr.get(local_chat_id, user_id)
if chat is None: if chat is None:
raise PlatformError( raise PlatformError(
@ -107,4 +119,15 @@ class RoutedPlatformClient(PlatformClient):
code="MATRIX_AGENT_NOT_FOUND", code="MATRIX_AGENT_NOT_FOUND",
) )
if _ws_debug_enabled():
logger.warning(
"matrix_route_resolved",
user_id=user_id,
local_chat_id=local_chat_id,
surface_ref=chat.surface_ref,
agent_id=str(agent_id),
platform_chat_id=str(platform_chat_id),
delegate_type=type(delegate).__name__,
)
return delegate, str(platform_chat_id) return delegate, str(platform_chat_id)

View file

@ -12,7 +12,7 @@
# base_url — HTTP/WS URL of this agent's endpoint # base_url — HTTP/WS URL of this agent's endpoint
# (overrides the global AGENT_BASE_URL env var for this agent) # (overrides the global AGENT_BASE_URL env var for this agent)
# workspace_path — absolute path to this agent's workspace directory inside the bot container # workspace_path — absolute path to this agent's workspace directory inside the bot container
# (the bot saves incoming files here and reads outgoing files from here) # (the bot saves incoming files directly here and reads outgoing files from here)
# Example: /agents/0 means the bot mounts the shared volume at /agents/ # Example: /agents/0 means the bot mounts the shared volume at /agents/
# and this agent's files live under /agents/0/ # and this agent's files live under /agents/0/

View file

@ -0,0 +1,10 @@
agents:
- id: agent-0
label: "Smoke Agent 0"
base_url: "http://agent-proxy:7000/agent_0/"
workspace_path: "/agents/0"
- id: agent-1
label: "Smoke Agent 1"
base_url: "http://agent-proxy:7000/agent_1/"
workspace_path: "/agents/1"

View file

@ -0,0 +1,18 @@
services:
agent-proxy:
volumes:
- ./docker/nginx/smoke-agents-timeout.conf:/etc/nginx/nginx.conf:ro
depends_on:
agent-no-status:
condition: service_started
agent-no-status:
build:
context: .
dockerfile: Dockerfile
target: production
args:
LAMBDA_AGENT_API_REF: ${LAMBDA_AGENT_API_REF:-master}
environment:
PYTHONUNBUFFERED: "1"
command: ["python", "-m", "tools.no_status_agent", "--host", "0.0.0.0", "--port", "8000"]

109
docker-compose.smoke.yml Normal file
View file

@ -0,0 +1,109 @@
services:
surface-smoke:
build:
context: .
dockerfile: Dockerfile
target: production
args:
LAMBDA_AGENT_API_REF: ${LAMBDA_AGENT_API_REF:-master}
environment:
PYTHONUNBUFFERED: "1"
SMOKE_TIMEOUT: ${SMOKE_TIMEOUT:-5}
volumes:
- agents:/agents
- ./config:/app/config:ro
depends_on:
agent-proxy:
condition: service_healthy
command: >
sh -lc "
python -m tools.check_matrix_agents --config /app/config/matrix-agents.smoke.yaml --timeout ${SMOKE_TIMEOUT:-5}
"
agent-proxy:
image: nginx:1.27-alpine
volumes:
- ./docker/nginx/smoke-agents.conf:/etc/nginx/nginx.conf:ro
healthcheck:
test:
- CMD-SHELL
- nc -z 127.0.0.1 7000
interval: 2s
timeout: 2s
retries: 15
start_period: 2s
depends_on:
agent-0:
condition: service_healthy
agent-1:
condition: service_healthy
ports:
- "${SMOKE_PROXY_PORT:-7000}:7000"
agent-0:
build:
context: ./external/platform-agent
target: development
additional_contexts:
agent_api: ./external/platform-agent_api
environment:
PYTHONUNBUFFERED: "1"
AGENT_ID: ${AGENT_0_ID:-agent-0}
PROVIDER_MODEL: ${PROVIDER_MODEL:-debug-model}
PROVIDER_URL: ${PROVIDER_URL:-http://provider.invalid/v1}
PROVIDER_API_KEY: ${PROVIDER_API_KEY:-debug-key}
volumes:
- ./external/platform-agent/src:/app/src
- ./external/platform-agent_api:/agent_api
- agents:/shared-agents
healthcheck:
test:
- CMD-SHELL
- python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/openapi.json', timeout=2).read()"
interval: 5s
timeout: 3s
retries: 12
start_period: 5s
command: >
sh -lc "
mkdir -p /shared-agents/0 &&
rm -rf /workspace &&
ln -s /shared-agents/0 /workspace &&
exec /app/.venv/bin/uvicorn src.main:app --host 0.0.0.0 --port 8000 --no-access-log
"
agent-1:
build:
context: ./external/platform-agent
target: development
additional_contexts:
agent_api: ./external/platform-agent_api
environment:
PYTHONUNBUFFERED: "1"
AGENT_ID: ${AGENT_1_ID:-agent-1}
PROVIDER_MODEL: ${PROVIDER_MODEL:-debug-model}
PROVIDER_URL: ${PROVIDER_URL:-http://provider.invalid/v1}
PROVIDER_API_KEY: ${PROVIDER_API_KEY:-debug-key}
volumes:
- ./external/platform-agent/src:/app/src
- ./external/platform-agent_api:/agent_api
- agents:/shared-agents
healthcheck:
test:
- CMD-SHELL
- python -c "import urllib.request; urllib.request.urlopen('http://127.0.0.1:8000/openapi.json', timeout=2).read()"
interval: 5s
timeout: 3s
retries: 12
start_period: 5s
command: >
sh -lc "
mkdir -p /shared-agents/1 &&
rm -rf /workspace &&
ln -s /shared-agents/1 /workspace &&
exec /app/.venv/bin/uvicorn src.main:app --host 0.0.0.0 --port 8000 --no-access-log
"
volumes:
agents:
name: ${SURFACES_SMOKE_VOLUME:-surfaces-smoke-agents}

View file

@ -0,0 +1,28 @@
events {}
http {
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
server {
listen 7000;
location /agent_0/ {
proxy_pass http://agent-0:8000/;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
}
location /agent_1/ {
proxy_pass http://agent-no-status:8000/;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
}
}
}

View file

@ -0,0 +1,28 @@
events {}
http {
map $http_upgrade $connection_upgrade {
default upgrade;
'' close;
}
server {
listen 7000;
location /agent_0/ {
proxy_pass http://agent-0:8000/;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
}
location /agent_1/ {
proxy_pass http://agent-1:8000/;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
}
}
}

View file

@ -68,7 +68,7 @@ agents:
- `user_agents` — маппинг Matrix user_id → agent_id. Если пользователь не найден — используется первый агент из списка. - `user_agents` — маппинг Matrix user_id → agent_id. Если пользователь не найден — используется первый агент из списка.
- `agents[].base_url` — HTTP URL агент-эндпоинта. Бот подключается через AgentApi. - `agents[].base_url` — HTTP URL агент-эндпоинта. Бот подключается через AgentApi.
- `agents[].workspace_path` — абсолютный путь к воркспейсу агента **внутри контейнера бота** (т.е. на shared volume). - `agents[].workspace_path` — абсолютный путь к воркспейсу агента **внутри контейнера бота** (т.е. на shared volume).
Бот сохраняет входящие файлы в `{workspace_path}/incoming/`, читает исходящие из `{workspace_path}/`. Бот сохраняет входящие файлы прямо в `{workspace_path}/`, читает исходящие из `{workspace_path}/`.
- Для 25-30 агентов продолжайте тот же паттерн до нужного номера: `/agent_17/` + `/agents/17`, `/agent_29/` + `/agents/29`. - Для 25-30 агентов продолжайте тот же паттерн до нужного номера: `/agent_17/` + `/agents/17`, `/agent_29/` + `/agents/29`.
## Surface Image Build Contract ## Surface Image Build Contract
@ -89,7 +89,7 @@ Published image:
```text ```text
mput1/surfaces-bot:latest mput1/surfaces-bot:latest
sha256:26ba3a49290ab7c1cf0fa97f3de3fefdc70b59df7e6f1e0c2255728f8e2369be sha256:2f135f3535f7765d4377b440cdabe41195ad2efbc3e175def159ae4689ef90bd
``` ```
`SURFACES_BOT_IMAGE` должен указывать на registry namespace, куда текущий Docker account может пушить. Ошибка `insufficient_scope` означает, что пользователь не залогинен в этот namespace, repository не создан, или у аккаунта нет push-доступа. `SURFACES_BOT_IMAGE` должен указывать на registry namespace, куда текущий Docker account может пушить. Ошибка `insufficient_scope` означает, что пользователь не залогинен в этот namespace, repository не создан, или у аккаунта нет push-доступа.
@ -153,14 +153,15 @@ AgentApi(
### Пользователь → Агент (входящий файл) ### Пользователь → Агент (входящий файл)
1. Matrix-бот получает файл от пользователя 1. Matrix-бот получает файл от пользователя
2. Сохраняет в workspace агента: `/agents/{N}/incoming/{filename}` 2. Сохраняет в workspace агента: `/agents/{N}/{filename}`
3. Вызывает `agent.send_message(text, attachments=["incoming/filename"])` 3. Если файл уже существует, выбирает следующее имя: `filename (1).ext`, `filename (2).ext`
4. Вызывает `agent.send_message(text, attachments=["filename"])`
— путь относительно `/workspace` агента — путь относительно `/workspace` агента
### Агент → Пользователь (исходящий файл) ### Агент → Пользователь (исходящий файл)
1. Агент эмитит `MsgEventSendFile(path="output/report.pdf")` 1. Агент эмитит `MsgEventSendFile(path="report.pdf")`
2. Matrix-бот читает файл: `/agents/{N}/output/report.pdf` 2. Matrix-бот читает файл: `/agents/{N}/report.pdf`
3. Отправляет как Matrix file message пользователю 3. Отправляет как Matrix file message пользователю
**Ключевое:** production handoff через `docker-compose.prod.yml` и internal E2E через `docker-compose.fullstack.yml` используют один и тот же `/agents` contract на стороне поверхности. Прямой HTTP-доступ к файлам не нужен. **Ключевое:** production handoff через `docker-compose.prod.yml` и internal E2E через `docker-compose.fullstack.yml` используют один и тот же `/agents` contract на стороне поверхности. Прямой HTTP-доступ к файлам не нужен.

340
docs/max-surface-guide.md Normal file
View file

@ -0,0 +1,340 @@
# Руководство по созданию новой поверхности Max
Этот документ описывает, как написать новую поверхность для Max по образцу текущей Matrix-поверхности в ветке `feat/deploy`.
Он основан на актуальной реализации Matrix surface в репозитории и отражает текущую продакшн-логику, а не устаревший легаси.
---
## 1. Общая архитектура
### 1.1. Что такое поверхность
Поверхность — это тонкий адаптер между конкретной платформой (Max) и общим ядром бота.
В репозитории есть разделение:
- `core/` — общее ядро и бизнес-логика
- `adapter/<platform>/` — реализация конкретной поверхности
- `sdk/real.py` — работа с реальной платформой / агентом
- `config/` — статическая конфигурация агентов
- `docs/surface-protocol.md` — общий контракт поверхностей
### 1.2. Как это работает
Поверхность должна:
- принимать нативные события от Max
- преобразовывать их в единый внутренний контракт (`IncomingMessage`, `IncomingCommand`, `IncomingCallback`)
- передавать их в `core`
- получать ответы из `core` (`OutgoingMessage`, `OutgoingUI`, `OutgoingTyping`, `OutgoingNotification`)
- преобразовывать ответы обратно в нативные Max-сообщения
Поверхность не должна:
- управлять жизненным циклом агентских контейнеров
- хранить долгую историю бесед вне `core`/платформы
- аутентифицировать пользователей сама (если это не часть Max API)
---
## 2. Структура новой поверхности
### 2.1. Основные каталоги
Рекомендуемая структура для Max:
```
adapter/max/
bot.py
converter.py
agent_registry.py
files.py
handlers/
store.py
```
### 2.2. Принцип reuse
По примеру Matrix surface, Max surface должен переиспользовать общий `core` и общий `sdk`.
Не дублируйте бизнес-логику, а реализуйте только адаптер:
- `adapter/max/converter.py` — конвертация событий Max ⇄ внутренние структуры
- `adapter/max/bot.py` — основной runtime, старт Max client, loop, отправка/прием
- `adapter/max/agent_registry.py` — загрузка `config/max-agents.yaml`
- `adapter/max/files.py` — хранение входящих/исходящих вложений
---
## 3. Контракт входящих/исходящих событий
### 3.1. Внутренний формат
Смотрите `core/protocol.py`. Основные типы:
- `IncomingMessage` — обычное текстовое сообщение + вложения
- `IncomingCommand` — управляющая команда
- `IncomingCallback` — подтверждение / интерактивные действия
- `OutgoingMessage` — ответ пользователю
- `OutgoingUI` — интерфейсные элементы (кнопки и т.п.)
- `OutgoingTyping` — индикатор печати
- `OutgoingNotification` — системное уведомление
### 3.2. Пример конверсии Matrix
В Matrix-реализации `adapter/matrix/converter.py`:
- текст `!yes` / `!no` превращается в `IncomingCallback` с `action: confirm/cancel`
- `!list`/`!remove` говорят не агенту, а surface-процессу
- вложения `m.file`, `m.image`, `m.audio`, `m.video` нормализуются в `Attachment`
Для Max реализуйте аналогичную логику для native команд вашего клиента.
---
## 4. Реестр агентов и маршрутизация
### 4.1. Что хранит реестр
В текущей Matrix реализации есть `config/matrix-agents.yaml` и `adapter/matrix/agent_registry.py`.
Структура:
```yaml
user_agents:
"@user0:matrix.example.org": agent-0
"@user1:matrix.example.org": agent-1
agents:
- id: agent-0
label: "Agent 0"
base_url: "http://lambda.coredump.ru:7000/agent_0/"
workspace_path: "/agents/0"
```
### 4.2. Логика выбора агента
- `user_agents` маппит конкретного пользователя на `agent_id`
- если user_id не найден, используется первый агент из списка
- `agents[].base_url` определяет URL агента
- `agents[].workspace_path` определяет путь внутри surface-контейнера для этого агента
Это важно: именно на этом контракте строится разделение агентов по рабочим каталогам.
### 4.3. Рекомендуемая Max-версия
Создайте `config/max-agents.yaml` с тем же смыслом.
- `user_agents` — маппинг Max user_id → agent_id
- `agents` — список агентов
- `workspace_path` для каждого агента должен быть абсолютным путем внутри surface-контейнера, например `/agents/0`
---
## 5. Файловый контракт
### 5.1. Shared volume
Текущее Matrix-решение использует shared volume:
- surface монтирует общий том как `/agents`
- каждый агент видит свою поддиректорию как `/workspace`
Топология:
```
Bot (/agents) Agent (/workspace = /agents/N/)
/agents/0/report.pdf ←──→ /workspace/report.pdf
```
### 5.2. Правила записи файлов
В `adapter/matrix/files.py` реализовано:
- входящий файл сохраняется прямо в `{workspace_root}/{filename}`
- возвращается путь `workspace_path` относительный внутри рабочего каталога агента
- при коллизии имен создаётся `file (1).ext`, `file (2).ext`
- `Attachment.workspace_path` передаётся агенту
Для исходящих файлов:
- surface читает файл из `workspace_root / workspace_path`
- загружает его в платформу
### 5.3. Пример поведения
- Пользователь отправляет файл → surface скачивает файл и кладёт его в agent workspace
- Агент получает `attachments=["report.pdf"]` и работает с относительным `workspace_path`
- Агент пишет результат в `/workspace/result.txt`
- surface читает `/agents/{N}/result.txt` и отправляет файл пользователю
---
## 6. Чат-менеджмент и контекст
### 6.1. `platform_chat_id`
Matrix-реализация использует `platform_chat_id` как стабильный идентификатор чата на стороне агента.
- `room_meta.platform_chat_id` определяется и сохраняется в `adapter/matrix/store.py`
- `reconcile_startup_state()` восстанавливает отсутствующие `platform_chat_id` при рестарте
- `RoutedPlatformClient` перенаправляет запросы агенту по `agent_id` + `platform_chat_id`
Для Max surface тот же принцип:
- каждая внешняя беседа должна привязываться к одному внутреннему `chat_id`
- этот `chat_id` используется для вызовов агента
- если в Max есть несколько комнат/топиков, каждая должна иметь свой `surface_ref`
### 6.2. Команды управления чатами
Matrix поддерживает следующие команды, которые нужно сохранить в Max:
- `!new [название]` — создать новый чат
- `!chats` — список активных чатов
- `!rename <название>` — переименовать текущий чат
- `!archive` — архивировать чат
- `!clear` / `!reset` — сбросить контекст текущего чата
- `!yes` / `!no` — подтвердить или отменить действие агента
- `!list` — показать очередь вложений
- `!remove <n>` / `!remove all` — удалить вложение из очереди
- `!help` — справка
Эти команды реализованы в Matrix через `adapter/matrix/handlers/`.
### 6.3. Очередь вложений
Matrix surface поддерживает staged attachments:
- файл может быть отправлен без текста
- surface сохраняет файл в `staged_attachments` для конкретного room_id + user_id
- следующий текст отправляется агенту вместе со всеми файлами из очереди
В Max можно реализовать ту же модель:
- `!list` показывает текущую очередь
- `!remove` удаляет файл из очереди
- команда-индикатор или следующее текстовое сообщение отправляет queued attachments агенту
---
## 7. Runtime и окружение
### 7.1. Переменные среды
Для Matrix surface текущий runtime ожидает:
- `MATRIX_HOMESERVER` — URL Matrix-сервера
- `MATRIX_USER_ID``@bot:example.org`
- `MATRIX_PASSWORD` или `MATRIX_ACCESS_TOKEN`
- `MATRIX_PLATFORM_BACKEND` — должно быть `real` для продакшна
- `MATRIX_AGENT_REGISTRY_PATH` — путь к `config/matrix-agents.yaml`
- `AGENT_BASE_URL` — fallback URL агента
- `SURFACES_WORKSPACE_DIR` — путь к shared volume внутри контейнера (по умолчанию `/workspace` в коде, но в docs рекомендуют `/agents`)
Для Max surface используйте аналогичные переменные:
- `MAX_PLATFORM_BACKEND=real`
- `MAX_AGENT_REGISTRY_PATH=/app/config/max-agents.yaml`
- `SURFACES_WORKSPACE_DIR=/agents`
- `AGENT_BASE_URL` — если хотите общий fallback
### 7.2. Environment contract
В коде `adapter/matrix/bot.py`:
- `_agent_base_url_from_env()` читает `AGENT_BASE_URL` или `AGENT_WS_URL`
- `_load_agent_registry_from_env()` читает `MATRIX_AGENT_REGISTRY_PATH`
- `_build_platform_from_env()` выбирает `RealPlatformClient` при `MATRIX_PLATFORM_BACKEND=real`
В Max surface реализуйте ту же логику, заменив префиксы на `MAX_`.
---
## 8. Тестирование и валидация
### 8.1. Юнит-тесты
В ветке есть покрытие для Matrix surface:
- `tests/adapter/matrix/test_files.py`
- `tests/adapter/matrix/test_dispatcher.py`
- `tests/adapter/matrix/test_routed_platform.py`
- `tests/adapter/matrix/test_reconciliation.py`
- `tests/adapter/matrix/test_context_commands.py`
Для Max создайте аналогичные тесты:
- проверка загрузки вложений
- проверка маршрутизации по `agent_id`
- проверка восстановления `platform_chat_id`
- проверка конвертации команд
### 8.2. Smoke-проверка deployment
Для Matrix surface есть `docker-compose.prod.yml` и `docker-compose.fullstack.yml`.
Для Max surface должно быть достаточно:
- bot-only production deployment
- shared volume `/agents`
- независимая проверка `config/max-agents.yaml`
- проверка, что surface запускается без локального агента
### 8.3. Проверка контрактов
Особое внимание:
- `agent_registry` должен загружать `workspace_path`
- file flow должен поддерживать `workspace_path` в `Attachment`
- отправка файлов должна использовать `resolve_workspace_attachment_path()`
- `platform_chat_id` должен существовать до вызова агента
---
## 9. Реализация шаг за шагом
1. Скопировать `adapter/matrix/` как шаблон для `adapter/max/`.
2. Сделать `adapter/max/converter.py`:
- превратить native Max-сообщения в `IncomingMessage`
- превратить команды в `IncomingCommand`
- превратить yes/no-подтверждения в `IncomingCallback`
3. Сделать `adapter/max/agent_registry.py` на основе `adapter/matrix/agent_registry.py`.
4. Сделать `adapter/max/files.py` на основе `adapter/matrix/files.py`.
5. Сделать `adapter/max/bot.py`:
- инстанцировать runtime
- читать env vars `MAX_*`
- загружать реестр агентов
- обрабатывать входящие события
- отправлять `Outgoing*` обратно в Max
6. Реализовать команды управления чатами и очередь вложений.
7. Прописать `config/max-agents.yaml`.
8. Прописать `docker-compose.max.yml` или аналог, чтобы surface монтировал `/agents`.
9. Написать тесты по аналогии с `tests/adapter/matrix/`.
10. Проверить, что все env vars читаются из окружения и не зависят от устаревших Matrix-переменных.
---
## 10. Важные замечания
- Текущий Matrix surface на ветке `feat/deploy` — активная реализация, а не устаревший легаси.
- Документация и код согласованы: `agent_registry`, `files`, `routed_platform`, `reconciliation` работают вместе.
- Обязательно явно задавайте `SURFACES_WORKSPACE_DIR=/agents` в production, если `workspace_path` в реестре указывает на `/agents/*`.
- Для Max surface сохраните ту же архитектуру: surface = thin adapter, агенты = внешние сервисы.
- Не пытайтесь в surface реализовывать логику запуска/стопа агент-контейнеров.
---
## 11. Полезные ссылки внутри репозитория
- `README.md`
- `docs/deploy-architecture.md`
- `docs/surface-protocol.md`
- `adapter/matrix/bot.py`
- `adapter/matrix/converter.py`
- `adapter/matrix/agent_registry.py`
- `adapter/matrix/files.py`
- `adapter/matrix/routed_platform.py`
- `adapter/matrix/reconciliation.py`
- `tests/adapter/matrix/`

View file

@ -0,0 +1,855 @@
# Matrix Multi-Agent Routing And Restart State Implementation Plan
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
**Goal:** Add Matrix multi-agent routing with user agent selection, room-level agent binding, and durable surface state that survives normal restart.
**Architecture:** Keep the shared `PlatformClient` protocol unchanged. Add a Matrix-specific routing facade that translates local Matrix chat identity into `(agent_id, platform_chat_id)` and delegates to one `RealPlatformClient` per configured agent. Persist only durable routing state in the existing SQLite-backed surface store and deliberately drop temporary UX state on restart.
**Tech Stack:** Python 3.11, matrix-nio, structlog, PyYAML, pytest, pytest-asyncio
---
## File Structure
- Create: `adapter/matrix/agent_registry.py`
Purpose: load and validate the YAML agent registry used by Matrix runtime.
- Create: `adapter/matrix/routed_platform.py`
Purpose: implement a Matrix-specific `PlatformClient` facade that resolves room bindings and delegates to per-agent `RealPlatformClient` instances.
- Create: `adapter/matrix/handlers/agent.py`
Purpose: implement `!agent` listing and selection behavior.
- Create: `tests/adapter/matrix/test_agent_registry.py`
Purpose: cover YAML loading and registry validation.
- Create: `tests/adapter/matrix/test_routed_platform.py`
Purpose: cover room-target resolution and per-agent delegation without changing the shared protocol.
- Create: `tests/adapter/matrix/test_agent_handler.py`
Purpose: cover `!agent` UX and persistence of `selected_agent_id`.
- Create: `tests/adapter/matrix/test_restart_persistence.py`
Purpose: prove durable user/room state and `PLATFORM_CHAT_SEQ_KEY` survive runtime recreation with SQLite.
- Create: `config/matrix-agents.example.yaml`
Purpose: document the expected agent registry format.
- Modify: `pyproject.toml`
Purpose: add YAML parsing dependency required by the runtime registry loader.
- Modify: `.env.example`
Purpose: document the config path env var for the Matrix agent registry.
- Modify: `README.md`
Purpose: document the new config file, `!agent`, and restart persistence expectations.
- Modify: `adapter/matrix/store.py`
Purpose: add helpers for `selected_agent_id`, room `agent_id`, and explicit sequence persistence semantics.
- Modify: `adapter/matrix/bot.py`
Purpose: load the agent registry, construct the routed platform facade, keep local Matrix chat ids through dispatch, and enforce stale/unbound room behavior before dispatch.
- Modify: `adapter/matrix/handlers/__init__.py`
Purpose: register the new `!agent` command.
- Modify: `adapter/matrix/handlers/chat.py`
Purpose: require a selected agent for `!new` and bind new rooms to that agent.
- Modify: `adapter/matrix/handlers/context_commands.py`
Purpose: keep context commands compatible with local chat ids and routed platform delegation.
- Modify: `adapter/matrix/handlers/settings.py`
Purpose: expose `!agent` in help text.
- Modify: `tests/adapter/matrix/test_dispatcher.py`
Purpose: cover pre-dispatch gating, stale room behavior, and `!new` semantics.
- Modify: `tests/adapter/matrix/test_context_commands.py`
Purpose: keep load/reset/context flows aligned with the routed platform facade.
---
### Task 1: Add The Agent Registry And Configuration Wiring
**Files:**
- Create: `adapter/matrix/agent_registry.py`
- Create: `tests/adapter/matrix/test_agent_registry.py`
- Create: `config/matrix-agents.example.yaml`
- Modify: `pyproject.toml`
- Modify: `.env.example`
- Modify: `README.md`
- [ ] **Step 1: Write the failing registry tests**
```python
# tests/adapter/matrix/test_agent_registry.py
from pathlib import Path
import pytest
from adapter.matrix.agent_registry import AgentRegistryError, load_agent_registry
def test_load_agent_registry_reads_yaml_entries(tmp_path: Path):
path = tmp_path / "agents.yaml"
path.write_text(
"agents:\n"
" - id: agent-1\n"
" label: Analyst\n"
" - id: agent-2\n"
" label: Research\n",
encoding="utf-8",
)
registry = load_agent_registry(path)
assert [agent.agent_id for agent in registry.agents] == ["agent-1", "agent-2"]
assert registry.get("agent-1").label == "Analyst"
def test_load_agent_registry_rejects_duplicate_ids(tmp_path: Path):
path = tmp_path / "agents.yaml"
path.write_text(
"agents:\n"
" - id: agent-1\n"
" label: Analyst\n"
" - id: agent-1\n"
" label: Duplicate\n",
encoding="utf-8",
)
with pytest.raises(AgentRegistryError, match="duplicate agent id"):
load_agent_registry(path)
```
- [ ] **Step 2: Run the registry tests to verify they fail**
Run: `uv run pytest tests/adapter/matrix/test_agent_registry.py -q`
Expected: FAIL with `ModuleNotFoundError` or `ImportError` for `adapter.matrix.agent_registry`.
- [ ] **Step 3: Add the YAML dependency and implement the registry loader**
```toml
# pyproject.toml
dependencies = [
"aiogram>=3.4,<4",
"matrix-nio>=0.21",
"pydantic>=2.5",
"structlog>=24.1",
"python-dotenv>=1.0",
"httpx>=0.27",
"aiohttp>=3.9",
"PyYAML>=6.0",
]
```
```python
# adapter/matrix/agent_registry.py
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
import yaml
class AgentRegistryError(ValueError):
pass
@dataclass(frozen=True)
class AgentDefinition:
agent_id: str
label: str
class AgentRegistry:
def __init__(self, agents: list[AgentDefinition]) -> None:
self.agents = agents
self._by_id = {agent.agent_id: agent for agent in agents}
def get(self, agent_id: str) -> AgentDefinition:
try:
return self._by_id[agent_id]
except KeyError as exc:
raise AgentRegistryError(f"unknown agent id: {agent_id}") from exc
def load_agent_registry(path: str | Path) -> AgentRegistry:
raw = yaml.safe_load(Path(path).read_text(encoding="utf-8")) or {}
entries = raw.get("agents")
if not isinstance(entries, list) or not entries:
raise AgentRegistryError("agents registry must contain a non-empty agents list")
agents: list[AgentDefinition] = []
seen: set[str] = set()
for entry in entries:
agent_id = str(entry.get("id", "")).strip()
label = str(entry.get("label", "")).strip()
if not agent_id or not label:
raise AgentRegistryError("each agent entry requires id and label")
if agent_id in seen:
raise AgentRegistryError(f"duplicate agent id: {agent_id}")
seen.add(agent_id)
agents.append(AgentDefinition(agent_id=agent_id, label=label))
return AgentRegistry(agents)
```
- [ ] **Step 4: Add the example config and runtime wiring docs**
```yaml
# config/matrix-agents.example.yaml
agents:
- id: agent-1
label: Analyst
- id: agent-2
label: Research
```
```env
# .env.example
MATRIX_AGENT_REGISTRY_PATH=config/matrix-agents.yaml
```
```markdown
# README.md
1. Copy `config/matrix-agents.example.yaml` to `config/matrix-agents.yaml`
2. Set `MATRIX_AGENT_REGISTRY_PATH=config/matrix-agents.yaml`
3. Use `!agent` in Matrix to select the active upstream agent
```
- [ ] **Step 5: Run the registry tests to verify they pass**
Run: `uv run pytest tests/adapter/matrix/test_agent_registry.py -q`
Expected: PASS
- [ ] **Step 6: Commit**
```bash
git add pyproject.toml .env.example README.md config/matrix-agents.example.yaml adapter/matrix/agent_registry.py tests/adapter/matrix/test_agent_registry.py
git commit -m "feat: add matrix agent registry loader"
```
---
### Task 2: Add A Matrix Routing Facade Without Changing `PlatformClient`
**Files:**
- Create: `adapter/matrix/routed_platform.py`
- Create: `tests/adapter/matrix/test_routed_platform.py`
- Modify: `adapter/matrix/bot.py`
- [ ] **Step 1: Write the failing routed-platform tests**
```python
# tests/adapter/matrix/test_routed_platform.py
import pytest
from adapter.matrix.routed_platform import RoutedPlatformClient
from adapter.matrix.store import set_room_meta
from core.chat import ChatManager
from core.store import InMemoryStore
from sdk.interface import MessageResponse
from sdk.prototype_state import PrototypeStateStore
class FakeDelegate:
def __init__(self, agent_id: str) -> None:
self.agent_id = agent_id
self.calls = []
async def send_message(self, user_id: str, chat_id: str, text: str, attachments=None):
self.calls.append((user_id, chat_id, text, attachments))
return MessageResponse(
message_id=user_id,
response=f"{self.agent_id}:{text}",
tokens_used=0,
finished=True,
)
async def get_or_create_user(self, external_id: str, platform: str, display_name=None):
return await PrototypeStateStore().get_or_create_user(external_id, platform, display_name)
async def get_settings(self, user_id: str):
return await PrototypeStateStore().get_settings(user_id)
async def update_settings(self, user_id: str, action):
return None
@pytest.mark.asyncio
async def test_routed_platform_delegates_using_room_agent_and_platform_chat_id():
store = InMemoryStore()
chat_mgr = ChatManager(None, store)
await chat_mgr.get_or_create("u1", "C1", "matrix", "!room:example.org", "Chat 1")
await set_room_meta(
store,
"!room:example.org",
{"chat_id": "C1", "matrix_user_id": "u1", "platform_chat_id": "41", "agent_id": "agent-2"},
)
delegates = {"agent-2": FakeDelegate("agent-2")}
platform = RoutedPlatformClient(store=store, chat_mgr=chat_mgr, delegates=delegates)
response = await platform.send_message("u1", "C1", "hello")
assert response.response == "agent-2:hello"
assert delegates["agent-2"].calls == [("u1", "41", "hello", None)]
```
- [ ] **Step 2: Run the routed-platform tests to verify they fail**
Run: `uv run pytest tests/adapter/matrix/test_routed_platform.py -q`
Expected: FAIL with `ImportError` for `RoutedPlatformClient`.
- [ ] **Step 3: Implement the routing facade and integrate runtime construction**
```python
# adapter/matrix/routed_platform.py
from __future__ import annotations
from sdk.interface import PlatformClient
class RoutedPlatformClient(PlatformClient):
def __init__(self, store, chat_mgr, delegates: dict[str, PlatformClient]) -> None:
self._store = store
self._chat_mgr = chat_mgr
self._delegates = delegates
async def _resolve_target(self, user_id: str, local_chat_id: str) -> tuple[PlatformClient, str]:
ctx = await self._chat_mgr.get(local_chat_id, user_id=user_id)
if ctx is None:
raise ValueError(f"Chat {local_chat_id} not found for {user_id}")
room_meta = await self._store.get(f"matrix_room:{ctx.surface_ref}")
if room_meta is None or not room_meta.get("agent_id") or not room_meta.get("platform_chat_id"):
raise ValueError(f"Room {ctx.surface_ref} is not bound to an agent target")
delegate = self._delegates[room_meta["agent_id"]]
return delegate, str(room_meta["platform_chat_id"])
async def send_message(self, user_id: str, chat_id: str, text: str, attachments=None):
delegate, platform_chat_id = await self._resolve_target(user_id, chat_id)
return await delegate.send_message(user_id, platform_chat_id, text, attachments)
async def stream_message(self, user_id: str, chat_id: str, text: str, attachments=None):
delegate, platform_chat_id = await self._resolve_target(user_id, chat_id)
async for chunk in delegate.stream_message(user_id, platform_chat_id, text, attachments):
yield chunk
async def get_or_create_user(self, external_id: str, platform: str, display_name=None):
first_delegate = next(iter(self._delegates.values()))
return await first_delegate.get_or_create_user(external_id, platform, display_name)
async def get_settings(self, user_id: str):
first_delegate = next(iter(self._delegates.values()))
return await first_delegate.get_settings(user_id)
async def update_settings(self, user_id: str, action):
first_delegate = next(iter(self._delegates.values()))
await first_delegate.update_settings(user_id, action)
```
```python
# adapter/matrix/bot.py
from adapter.matrix.agent_registry import load_agent_registry
from adapter.matrix.routed_platform import RoutedPlatformClient
def _build_platform_from_env(store: StateStore, chat_mgr: ChatManager) -> PlatformClient:
backend = os.environ.get("MATRIX_PLATFORM_BACKEND", "mock").strip().lower()
if backend != "real":
return MockPlatformClient()
registry = load_agent_registry(os.environ["MATRIX_AGENT_REGISTRY_PATH"])
delegates = {
agent.agent_id: RealPlatformClient(
agent_id=agent.agent_id,
agent_base_url=_agent_base_url_from_env(),
prototype_state=PrototypeStateStore(),
platform="matrix",
)
for agent in registry.agents
}
return RoutedPlatformClient(store=store, chat_mgr=chat_mgr, delegates=delegates)
def build_runtime(...):
store = store or InMemoryStore()
chat_mgr = ChatManager(None, store)
platform = platform or _build_platform_from_env(store, chat_mgr)
auth_mgr = AuthManager(platform, store)
settings_mgr = SettingsManager(platform, store)
dispatcher = EventDispatcher(
platform=platform,
chat_mgr=chat_mgr,
auth_mgr=auth_mgr,
settings_mgr=settings_mgr,
)
```
- [ ] **Step 4: Run the routed-platform tests to verify they pass**
Run: `uv run pytest tests/adapter/matrix/test_routed_platform.py -q`
Expected: PASS
- [ ] **Step 5: Commit**
```bash
git add adapter/matrix/routed_platform.py adapter/matrix/bot.py tests/adapter/matrix/test_routed_platform.py
git commit -m "feat: add matrix routed platform facade"
```
---
### Task 3: Add `!agent` Selection And Durable User Agent State
**Files:**
- Create: `adapter/matrix/handlers/agent.py`
- Create: `tests/adapter/matrix/test_agent_handler.py`
- Modify: `adapter/matrix/store.py`
- Modify: `adapter/matrix/handlers/__init__.py`
- Modify: `adapter/matrix/handlers/settings.py`
- [ ] **Step 1: Write the failing agent-handler tests**
```python
# tests/adapter/matrix/test_agent_handler.py
import pytest
from adapter.matrix.handlers.agent import make_handle_agent
from adapter.matrix.store import get_room_meta, get_selected_agent_id, set_room_meta
from core.protocol import IncomingCommand
from core.store import InMemoryStore
class FakeRegistry:
def __init__(self) -> None:
self.agents = [
type("Agent", (), {"agent_id": "agent-1", "label": "Analyst"})(),
type("Agent", (), {"agent_id": "agent-2", "label": "Research"})(),
]
@pytest.mark.asyncio
async def test_agent_command_lists_available_agents():
handler = make_handle_agent(store=InMemoryStore(), registry=FakeRegistry())
result = await handler(
IncomingCommand(user_id="u1", platform="matrix", chat_id="C1", command="agent", args=[]),
None,
None,
None,
None,
)
assert "1. Analyst" in result[0].text
assert "2. Research" in result[0].text
@pytest.mark.asyncio
async def test_agent_command_persists_selected_agent_and_binds_unbound_room():
store = InMemoryStore()
await set_room_meta(store, "!room:example.org", {"chat_id": "C1", "matrix_user_id": "u1"})
handler = make_handle_agent(store=store, registry=FakeRegistry())
chat_mgr = type(
"ChatMgr",
(),
{"get": staticmethod(lambda chat_id, user_id=None: type("Ctx", (), {"surface_ref": "!room:example.org"})())},
)()
await handler(
IncomingCommand(user_id="u1", platform="matrix", chat_id="C1", command="agent", args=["2"]),
None,
None,
chat_mgr,
None,
)
assert await get_selected_agent_id(store, "u1") == "agent-2"
room_meta = await get_room_meta(store, "!room:example.org")
assert room_meta["agent_id"] == "agent-2"
```
- [ ] **Step 2: Run the agent-handler tests to verify they fail**
Run: `uv run pytest tests/adapter/matrix/test_agent_handler.py -q`
Expected: FAIL with missing handler or store helpers.
- [ ] **Step 3: Add durable store helpers and implement `!agent`**
```python
# adapter/matrix/store.py
async def get_selected_agent_id(store: StateStore, matrix_user_id: str) -> str | None:
meta = await get_user_meta(store, matrix_user_id) or {}
value = meta.get("selected_agent_id")
return str(value) if value else None
async def set_selected_agent_id(store: StateStore, matrix_user_id: str, agent_id: str) -> None:
meta = await get_user_meta(store, matrix_user_id) or {}
meta["selected_agent_id"] = agent_id
await set_user_meta(store, matrix_user_id, meta)
async def set_room_agent_id(store: StateStore, room_id: str, agent_id: str) -> None:
meta = dict(await get_room_meta(store, room_id) or {})
meta["agent_id"] = agent_id
await set_room_meta(store, room_id, meta)
```
```python
# adapter/matrix/handlers/agent.py
from __future__ import annotations
from adapter.matrix.store import (
get_room_meta,
get_selected_agent_id,
next_platform_chat_id,
set_platform_chat_id,
set_room_agent_id,
set_selected_agent_id,
)
from core.protocol import IncomingCommand, OutgoingMessage
def make_handle_agent(store, registry):
async def handle_agent(event: IncomingCommand, auth_mgr, platform, chat_mgr, settings_mgr):
if not event.args:
current = await get_selected_agent_id(store, event.user_id)
lines = ["Доступные агенты:"]
for index, agent in enumerate(registry.agents, start=1):
marker = " (текущий)" if agent.agent_id == current else ""
lines.append(f"{index}. {agent.label}{marker}")
lines.append("")
lines.append("Выбери агента: !agent <номер>")
return [OutgoingMessage(chat_id=event.chat_id, text="\n".join(lines))]
agent = registry.agents[int(event.args[0]) - 1]
await set_selected_agent_id(store, event.user_id, agent.agent_id)
ctx = await chat_mgr.get(event.chat_id, user_id=event.user_id) if chat_mgr else None
if ctx is not None:
room_meta = await get_room_meta(store, ctx.surface_ref)
if room_meta is not None and not room_meta.get("agent_id"):
await set_room_agent_id(store, ctx.surface_ref, agent.agent_id)
if not room_meta.get("platform_chat_id"):
await set_platform_chat_id(store, ctx.surface_ref, await next_platform_chat_id(store))
return [OutgoingMessage(chat_id=event.chat_id, text=f"Агент переключён на {agent.label}. Этот чат готов к работе.")]
return [OutgoingMessage(chat_id=event.chat_id, text=f"Агент переключён на {agent.label}. Для продолжения используй !new.")]
return handle_agent
```
- [ ] **Step 4: Register the command and update help text**
```python
# adapter/matrix/handlers/__init__.py
from adapter.matrix.handlers.agent import make_handle_agent
dispatcher.register(IncomingCommand, "agent", make_handle_agent(store, registry))
```
```python
# adapter/matrix/handlers/settings.py
HELP_TEXT = "\n".join(
[
"Команды",
"",
"!agent выбрать активного агента",
"!new [название] создать новый чат",
"!chats список активных чатов",
"!rename <название> переименовать текущий чат",
"!archive архивировать текущий чат",
"!context показать текущее состояние контекста",
"!save [имя] сохранить текущий контекст",
"!load показать сохранённые контексты",
]
)
```
- [ ] **Step 5: Run the agent-handler tests to verify they pass**
Run: `uv run pytest tests/adapter/matrix/test_agent_handler.py -q`
Expected: PASS
- [ ] **Step 6: Commit**
```bash
git add adapter/matrix/store.py adapter/matrix/handlers/agent.py adapter/matrix/handlers/__init__.py adapter/matrix/handlers/settings.py tests/adapter/matrix/test_agent_handler.py
git commit -m "feat: add matrix agent selection command"
```
---
### Task 4: Bind Rooms Correctly And Block Stale Chats
**Files:**
- Modify: `adapter/matrix/bot.py`
- Modify: `adapter/matrix/handlers/chat.py`
- Modify: `adapter/matrix/handlers/context_commands.py`
- Modify: `tests/adapter/matrix/test_dispatcher.py`
- Modify: `tests/adapter/matrix/test_context_commands.py`
- [ ] **Step 1: Write the failing dispatcher and context-command tests**
```python
# tests/adapter/matrix/test_dispatcher.py
@pytest.mark.asyncio
async def test_bot_replies_with_agent_prompt_when_user_has_no_selected_agent():
runtime = build_runtime(platform=MockPlatformClient())
client = SimpleNamespace(user_id="@bot:example.org", room_send=AsyncMock())
bot = MatrixBot(client, runtime)
await set_room_meta(runtime.store, "!room:example.org", {"chat_id": "C1", "matrix_user_id": "@alice:example.org"})
await bot.on_room_message(SimpleNamespace(room_id="!room:example.org"), SimpleNamespace(sender="@alice:example.org", body="hello"))
client.room_send.assert_awaited_once()
assert "выбери агента" in client.room_send.call_args.args[2]["body"].lower()
@pytest.mark.asyncio
async def test_new_chat_requires_selected_agent_and_binds_room_meta():
client = SimpleNamespace(
room_create=AsyncMock(return_value=SimpleNamespace(room_id="!r2:example")),
room_put_state=AsyncMock(),
)
runtime = build_runtime(platform=MockPlatformClient(), client=client)
await set_user_meta(runtime.store, "u1", {"space_id": "!space:example", "next_chat_index": 2, "selected_agent_id": "agent-2"})
result = await runtime.dispatcher.dispatch(
IncomingCommand(user_id="u1", platform="matrix", chat_id="C1", command="new", args=["Research"])
)
room_meta = await get_room_meta(runtime.store, "!r2:example")
assert room_meta["agent_id"] == "agent-2"
assert "Создан чат" in result[0].text
```
```python
# tests/adapter/matrix/test_context_commands.py
@pytest.mark.asyncio
async def test_load_selection_calls_platform_with_local_chat_id():
platform = MatrixCommandPlatform()
runtime = build_runtime(platform=platform)
await runtime.chat_mgr.get_or_create("u1", "C1", "matrix", "!room:example.org", "Chat 1")
await set_room_meta(runtime.store, "!room:example.org", {"chat_id": "C1", "matrix_user_id": "u1", "platform_chat_id": "41", "agent_id": "agent-2"})
client = SimpleNamespace(user_id="@bot:example.org", room_send=AsyncMock())
bot = MatrixBot(client, runtime)
await set_load_pending(runtime.store, "u1", "!room:example.org", {"saves": [{"name": "session-a", "created_at": "2026-04-17T00:00:00+00:00"}]})
await bot.on_room_message(SimpleNamespace(room_id="!room:example.org"), SimpleNamespace(sender="u1", body="1"))
platform.send_message.assert_awaited_once_with("u1", "C1", LOAD_PROMPT.format(name="session-a"))
```
- [ ] **Step 2: Run the dispatcher and context-command tests to verify they fail**
Run: `uv run pytest tests/adapter/matrix/test_dispatcher.py tests/adapter/matrix/test_context_commands.py -q`
Expected: FAIL because the current runtime still injects `platform_chat_id` into normal messages and `!new` does not require or persist `agent_id`.
- [ ] **Step 3: Implement room binding and stale-room checks in runtime**
```python
# adapter/matrix/bot.py
from adapter.matrix.store import (
get_selected_agent_id,
get_room_meta,
next_platform_chat_id,
set_platform_chat_id,
set_room_agent_id,
)
async def _ensure_active_room_target(self, room_id: str, user_id: str) -> tuple[dict | None, OutgoingMessage | None]:
room_meta = await get_room_meta(self.runtime.store, room_id)
selected_agent_id = await get_selected_agent_id(self.runtime.store, user_id)
if not selected_agent_id:
return room_meta, OutgoingMessage(chat_id=room_id, text="Сначала выбери агента через !agent.")
if room_meta is None:
return room_meta, None
if not room_meta.get("agent_id"):
await set_room_agent_id(self.runtime.store, room_id, selected_agent_id)
if not room_meta.get("platform_chat_id"):
await set_platform_chat_id(self.runtime.store, room_id, await next_platform_chat_id(self.runtime.store))
room_meta = await get_room_meta(self.runtime.store, room_id)
return room_meta, None
if room_meta["agent_id"] != selected_agent_id:
return room_meta, OutgoingMessage(chat_id=room_id, text="Этот чат привязан к старому агенту. Используй !new.")
return room_meta, None
```
```python
# adapter/matrix/bot.py
local_chat_id = await resolve_chat_id(self.runtime.store, room.room_id, sender)
dispatch_chat_id = local_chat_id
if not body.startswith("!"):
room_meta, blocking = await self._ensure_active_room_target(room.room_id, sender)
if blocking is not None:
await self._send_all(room.room_id, [blocking])
return
incoming = from_room_event(event, room_id=room.room_id, chat_id=dispatch_chat_id)
```
- [ ] **Step 4: Require selected agent for `!new` and persist room `agent_id`**
```python
# adapter/matrix/handlers/chat.py
from adapter.matrix.store import get_selected_agent_id
selected_agent_id = await get_selected_agent_id(store, event.user_id)
if not selected_agent_id:
return [OutgoingMessage(chat_id=event.chat_id, text="Сначала выбери агента через !agent.")]
await set_room_meta(
store,
room_id,
{
"room_type": "chat",
"chat_id": chat_id,
"display_name": room_name,
"matrix_user_id": event.user_id,
"space_id": space_id,
"platform_chat_id": platform_chat_id,
"agent_id": selected_agent_id,
},
)
```
```python
# adapter/matrix/bot.py
room_meta = await get_room_meta(self.runtime.store, room_id)
local_chat_id = room_meta.get("chat_id", room_id) if room_meta else room_id
await self.runtime.platform.send_message(
user_id,
local_chat_id,
LOAD_PROMPT.format(name=name),
)
```
- [ ] **Step 5: Run the dispatcher and context-command tests to verify they pass**
Run: `uv run pytest tests/adapter/matrix/test_dispatcher.py tests/adapter/matrix/test_context_commands.py -q`
Expected: PASS
- [ ] **Step 6: Commit**
```bash
git add adapter/matrix/bot.py adapter/matrix/handlers/chat.py adapter/matrix/handlers/context_commands.py tests/adapter/matrix/test_dispatcher.py tests/adapter/matrix/test_context_commands.py
git commit -m "feat: bind matrix rooms to selected agents"
```
---
### Task 5: Prove Durable Restart State And Sequence Persistence
**Files:**
- Create: `tests/adapter/matrix/test_restart_persistence.py`
- Modify: `adapter/matrix/store.py`
- Modify: `README.md`
- [ ] **Step 1: Write the failing restart-persistence tests**
```python
# tests/adapter/matrix/test_restart_persistence.py
import pytest
from adapter.matrix.store import (
get_selected_agent_id,
next_platform_chat_id,
set_room_meta,
set_selected_agent_id,
)
from core.store import SQLiteStore
@pytest.mark.asyncio
async def test_selected_agent_and_room_binding_survive_store_recreation(tmp_path):
db_path = tmp_path / "matrix.db"
store = SQLiteStore(str(db_path))
await set_selected_agent_id(store, "u1", "agent-2")
await set_room_meta(
store,
"!room:example.org",
{"chat_id": "C1", "matrix_user_id": "u1", "platform_chat_id": "41", "agent_id": "agent-2"},
)
reopened = SQLiteStore(str(db_path))
assert await get_selected_agent_id(reopened, "u1") == "agent-2"
assert (await reopened.get("matrix_room:!room:example.org"))["agent_id"] == "agent-2"
assert (await reopened.get("matrix_room:!room:example.org"))["platform_chat_id"] == "41"
@pytest.mark.asyncio
async def test_platform_chat_sequence_survives_store_recreation(tmp_path):
db_path = tmp_path / "matrix.db"
store = SQLiteStore(str(db_path))
assert await next_platform_chat_id(store) == "1"
assert await next_platform_chat_id(store) == "2"
reopened = SQLiteStore(str(db_path))
assert await next_platform_chat_id(reopened) == "3"
```
- [ ] **Step 2: Run the restart-persistence tests to verify they fail**
Run: `uv run pytest tests/adapter/matrix/test_restart_persistence.py -q`
Expected: FAIL because `selected_agent_id` helpers do not exist yet or sequence persistence behavior is not explicitly covered.
- [ ] **Step 3: Make sequence persistence explicit and document the restart boundary**
```python
# adapter/matrix/store.py
PLATFORM_CHAT_SEQ_KEY = "matrix_platform_chat_seq"
async def next_platform_chat_id(store: StateStore) -> str:
async with _PLATFORM_CHAT_SEQ_LOCK:
data = await store.get(PLATFORM_CHAT_SEQ_KEY)
index = int((data or {}).get("next_platform_chat_index", 1))
await store.set(PLATFORM_CHAT_SEQ_KEY, {"next_platform_chat_index": index + 1})
return str(index)
```
```markdown
# README.md
- Matrix durable state lives in `lambda_matrix.db` and `matrix_store`
- normal restart is supported only when those paths survive container recreation
- staged attachments and pending confirmations are intentionally not restored
```
- [ ] **Step 4: Run the restart-persistence tests to verify they pass**
Run: `uv run pytest tests/adapter/matrix/test_restart_persistence.py -q`
Expected: PASS
- [ ] **Step 5: Run the combined verification sweep**
Run: `uv run pytest tests/adapter/matrix/test_agent_registry.py tests/adapter/matrix/test_routed_platform.py tests/adapter/matrix/test_agent_handler.py tests/adapter/matrix/test_dispatcher.py tests/adapter/matrix/test_context_commands.py tests/adapter/matrix/test_restart_persistence.py tests/platform/test_real.py -q`
Expected: PASS
- [ ] **Step 6: Commit**
```bash
git add adapter/matrix/store.py README.md tests/adapter/matrix/test_restart_persistence.py
git commit -m "test: cover matrix restart state persistence"
```
---
## Self-Review
### Spec coverage
- Multi-agent agent registry: Task 1
- Shared `PlatformClient` preserved via routing facade: Task 2
- `!agent` UX and durable `selected_agent_id`: Task 3
- Unbound room activation, `!new`, stale room rejection: Task 4
- Restart durability for user state, room state, and `PLATFORM_CHAT_SEQ_KEY`: Task 5
### Placeholder scan
- No `TODO`, `TBD`, or “implement later” markers remain.
- Each task includes exact file paths, tests, commands, and minimal code snippets.
### Type consistency
- `selected_agent_id` lives in user metadata throughout the plan.
- `agent_id` and `platform_chat_id` live in room metadata throughout the plan.
- `RoutedPlatformClient` keeps the existing `PlatformClient` method names intact.

View file

@ -1,8 +1,11 @@
from __future__ import annotations from __future__ import annotations
import asyncio import asyncio
import os
import re
from collections.abc import AsyncIterator from collections.abc import AsyncIterator
from pathlib import Path from pathlib import Path
from urllib.parse import urljoin, urlsplit, urlunsplit
import structlog import structlog
@ -21,6 +24,11 @@ from sdk.upstream_agent_api import AgentApi, MsgEventSendFile, MsgEventTextChunk
logger = structlog.get_logger(__name__) logger = structlog.get_logger(__name__)
def _ws_debug_enabled() -> bool:
value = os.environ.get("SURFACES_DEBUG_WS", "")
return value.strip().lower() in {"1", "true", "yes", "on"}
class RealPlatformClient(PlatformClient): class RealPlatformClient(PlatformClient):
def __init__( def __init__(
self, self,
@ -31,11 +39,20 @@ class RealPlatformClient(PlatformClient):
agent_api_cls=AgentApi, agent_api_cls=AgentApi,
) -> None: ) -> None:
self._agent_id = agent_id self._agent_id = agent_id
self._agent_base_url = agent_base_url self._raw_agent_base_url = agent_base_url
self._agent_base_url = self._normalize_agent_base_url(agent_base_url)
self._agent_api_cls = agent_api_cls self._agent_api_cls = agent_api_cls
self._prototype_state = prototype_state self._prototype_state = prototype_state
self._platform = platform self._platform = platform
self._chat_send_locks: dict[str, asyncio.Lock] = {} self._chat_send_locks: dict[str, asyncio.Lock] = {}
if _ws_debug_enabled():
logger.warning(
"agent_client_initialized",
agent_id=self._agent_id,
platform=self._platform,
raw_base_url=self._raw_agent_base_url,
normalized_base_url=self._agent_base_url,
)
@property @property
def agent_id(self) -> str: def agent_id(self) -> str:
@ -171,12 +188,28 @@ class RealPlatformClient(PlatformClient):
yield event yield event
def _build_chat_api(self, chat_id: str): def _build_chat_api(self, chat_id: str):
if _ws_debug_enabled():
logger.warning(
"agent_chat_api_build",
agent_id=self._agent_id,
chat_id=str(chat_id),
normalized_base_url=self._agent_base_url,
ws_url=urljoin(self._agent_base_url, f"v1/agent_ws/{chat_id}/"),
)
return self._agent_api_cls( return self._agent_api_cls(
agent_id=self._agent_id, agent_id=self._agent_id,
base_url=self._agent_base_url, base_url=self._agent_base_url,
chat_id=str(chat_id), chat_id=str(chat_id),
) )
@staticmethod
def _normalize_agent_base_url(base_url: str) -> str:
parsed = urlsplit(base_url)
path = re.sub(r"(?:/v1)?/agent_ws(?:/[^/]+)?/?$", "", parsed.path.rstrip("/"))
if path:
path = f"{path}/"
return urlunsplit((parsed.scheme, parsed.netloc, path, "", ""))
@staticmethod @staticmethod
async def _close_chat_api(chat_api) -> None: async def _close_chat_api(chat_api) -> None:
close = getattr(chat_api, "close", None) close = getattr(chat_api, "close", None)

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

View file

@ -211,7 +211,7 @@ async def test_invite_event_is_idempotent_per_user():
assert client.join.await_count == 2 assert client.join.await_count == 2
assert client.room_create.await_count == 2 assert client.room_create.await_count == 2
client.room_send.assert_awaited_once() assert client.room_send.await_count == 2
async def test_bot_ignores_its_own_messages(): async def test_bot_ignores_its_own_messages():
@ -348,7 +348,8 @@ async def test_bot_downloads_matrix_file_to_configured_agent_workspace(tmp_path,
base_url="http://lambda.coredump.ru:7000/agent_17/", base_url="http://lambda.coredump.ru:7000/agent_17/",
workspace_path=str(tmp_path / "agents" / "17"), workspace_path=str(tmp_path / "agents" / "17"),
) )
] ],
user_agents={"@alice:example.org": "agent-17"},
) )
await set_room_meta( await set_room_meta(
runtime.store, runtime.store,
@ -381,7 +382,7 @@ async def test_bot_downloads_matrix_file_to_configured_agent_workspace(tmp_path,
staged = await get_staged_attachments( staged = await get_staged_attachments(
runtime.store, "!chat17:example.org", "@alice:example.org" runtime.store, "!chat17:example.org", "@alice:example.org"
) )
assert staged[0]["workspace_path"].startswith("incoming/") assert staged[0]["workspace_path"] == "report.pdf"
assert ( assert (
tmp_path / "agents" / "17" / staged[0]["workspace_path"] tmp_path / "agents" / "17" / staged[0]["workspace_path"]
).read_bytes() == b"%PDF-1.7" ).read_bytes() == b"%PDF-1.7"
@ -389,7 +390,7 @@ async def test_bot_downloads_matrix_file_to_configured_agent_workspace(tmp_path,
async def test_bot_uploads_agent_output_from_configured_agent_workspace(tmp_path, monkeypatch): async def test_bot_uploads_agent_output_from_configured_agent_workspace(tmp_path, monkeypatch):
monkeypatch.setenv("SURFACES_WORKSPACE_DIR", str(tmp_path / "agents")) monkeypatch.setenv("SURFACES_WORKSPACE_DIR", str(tmp_path / "agents"))
output_file = tmp_path / "agents" / "17" / "output" / "result.txt" output_file = tmp_path / "agents" / "17" / "result.txt"
output_file.parent.mkdir(parents=True) output_file.parent.mkdir(parents=True)
output_file.write_text("ready", encoding="utf-8") output_file.write_text("ready", encoding="utf-8")
runtime = build_runtime(platform=MockPlatformClient()) runtime = build_runtime(platform=MockPlatformClient())
@ -401,7 +402,8 @@ async def test_bot_uploads_agent_output_from_configured_agent_workspace(tmp_path
base_url="http://lambda.coredump.ru:7000/agent_17/", base_url="http://lambda.coredump.ru:7000/agent_17/",
workspace_path=str(tmp_path / "agents" / "17"), workspace_path=str(tmp_path / "agents" / "17"),
) )
] ],
user_agents={"@alice:example.org": "agent-17"},
) )
await set_room_meta( await set_room_meta(
runtime.store, runtime.store,
@ -429,7 +431,7 @@ async def test_bot_uploads_agent_output_from_configured_agent_workspace(tmp_path
type="document", type="document",
filename="result.txt", filename="result.txt",
mime_type="text/plain", mime_type="text/plain",
workspace_path="output/result.txt", workspace_path="result.txt",
) )
], ],
) )

View file

@ -4,29 +4,12 @@ from pathlib import Path
from types import SimpleNamespace from types import SimpleNamespace
from adapter.matrix.files import ( from adapter.matrix.files import (
build_agent_incoming_path, build_agent_workspace_path,
build_workspace_attachment_path,
download_matrix_attachment, download_matrix_attachment,
) )
from core.protocol import Attachment from core.protocol import Attachment
def test_build_workspace_attachment_path_scopes_by_surface_user_and_room(tmp_path: Path):
rel_path, abs_path = build_workspace_attachment_path(
workspace_root=tmp_path,
matrix_user_id="@alice:example.org",
room_id="!room:example.org",
filename="report.pdf",
timestamp="20260420-153000",
)
assert (
rel_path
== "surfaces/matrix/alice_example.org/room_example.org/inbox/20260420-153000-report.pdf"
)
assert abs_path == tmp_path / rel_path
async def test_download_matrix_attachment_persists_file_and_returns_workspace_path(tmp_path: Path): async def test_download_matrix_attachment_persists_file_and_returns_workspace_path(tmp_path: Path):
async def download(url: str): async def download(url: str):
assert url == "mxc://server/id" assert url == "mxc://server/id"
@ -49,40 +32,46 @@ async def test_download_matrix_attachment_persists_file_and_returns_workspace_pa
timestamp="20260420-153000", timestamp="20260420-153000",
) )
assert saved.workspace_path is not None assert saved.workspace_path == "report.pdf"
assert saved.workspace_path.endswith("20260420-153000-report.pdf") assert (tmp_path / "report.pdf").read_bytes() == b"%PDF-1.7"
assert (tmp_path / saved.workspace_path).read_bytes() == b"%PDF-1.7"
def test_build_workspace_attachment_path_keeps_room_safe_agents_relative_contract(tmp_path: Path): def test_build_agent_workspace_path_uses_agent_workspace_volume(tmp_path: Path):
rel_path, abs_path = build_workspace_attachment_path( rel_path, abs_path = build_agent_workspace_path(
workspace_root=tmp_path / "agents" / "7",
matrix_user_id="@alice+bob:example.org",
room_id="!room/ops:example.org",
filename="quarterly status (final).pdf",
timestamp="20260420-153000",
)
assert rel_path == (
"surfaces/matrix/alice_bob_example.org/room_ops_example.org/inbox/"
"20260420-153000-quarterly_status_final_.pdf"
)
assert not Path(rel_path).is_absolute()
assert abs_path == tmp_path / "agents" / "7" / rel_path
def test_build_agent_incoming_path_uses_agent_workspace_volume(tmp_path: Path):
rel_path, abs_path = build_agent_incoming_path(
workspace_root=tmp_path / "agents" / "17", workspace_root=tmp_path / "agents" / "17",
filename="quarterly status.pdf", filename="quarterly status.pdf",
timestamp="20260428-110000",
) )
assert rel_path == "incoming/20260428-110000-quarterly_status.pdf" assert rel_path == "quarterly status.pdf"
assert abs_path == tmp_path / "agents" / "17" / rel_path assert abs_path == tmp_path / "agents" / "17" / rel_path
async def test_download_matrix_attachment_uses_agent_workspace_incoming_dir(tmp_path: Path): def test_build_agent_workspace_path_uses_windows_style_copy_index(tmp_path: Path):
workspace_root = tmp_path / "agents" / "17"
workspace_root.mkdir(parents=True)
(workspace_root / "report.pdf").write_bytes(b"old")
(workspace_root / "report (1).pdf").write_bytes(b"older")
rel_path, abs_path = build_agent_workspace_path(
workspace_root=workspace_root,
filename="report.pdf",
)
assert rel_path == "report (2).pdf"
assert abs_path == workspace_root / "report (2).pdf"
def test_build_agent_workspace_path_sanitizes_to_basename(tmp_path: Path):
rel_path, abs_path = build_agent_workspace_path(
workspace_root=tmp_path / "agents" / "17",
filename="../../quarterly: status?.pdf",
)
assert rel_path == "quarterly_ status_.pdf"
assert abs_path == tmp_path / "agents" / "17" / "quarterly_ status_.pdf"
async def test_download_matrix_attachment_uses_agent_workspace_root(tmp_path: Path):
async def download(url: str): async def download(url: str):
assert url == "mxc://server/id" assert url == "mxc://server/id"
return SimpleNamespace(body=b"%PDF-1.7") return SimpleNamespace(body=b"%PDF-1.7")
@ -101,5 +90,5 @@ async def test_download_matrix_attachment_uses_agent_workspace_incoming_dir(tmp_
timestamp="20260428-110000", timestamp="20260428-110000",
) )
assert saved.workspace_path == "incoming/20260428-110000-report.pdf" assert saved.workspace_path == "report.pdf"
assert (tmp_path / "agents" / "17" / saved.workspace_path).read_bytes() == b"%PDF-1.7" assert (tmp_path / "agents" / "17" / saved.workspace_path).read_bytes() == b"%PDF-1.7"

View file

@ -7,7 +7,7 @@ from nio.api import RoomVisibility
from adapter.matrix.bot import build_runtime from adapter.matrix.bot import build_runtime
from adapter.matrix.handlers.auth import handle_invite from adapter.matrix.handlers.auth import handle_invite
from adapter.matrix.store import get_room_meta, get_user_meta, set_user_meta from adapter.matrix.store import get_room_meta, get_user_meta, set_room_meta, set_user_meta
from sdk.mock import MockPlatformClient from sdk.mock import MockPlatformClient
@ -100,6 +100,53 @@ async def test_mat02_invite_idempotent():
assert client.room_create.await_count == 2 assert client.room_create.await_count == 2
async def test_existing_user_invite_reinvites_space_and_active_chats():
runtime = build_runtime(platform=MockPlatformClient())
await set_user_meta(
runtime.store,
"@alice:example.org",
{"space_id": "!space:example.org", "next_chat_index": 2},
)
await set_room_meta(
runtime.store,
"!chat1:example.org",
{
"room_type": "chat",
"chat_id": "C1",
"display_name": "Чат 1",
"matrix_user_id": "@alice:example.org",
"space_id": "!space:example.org",
"platform_chat_id": "1",
"agent_id": "agent-1",
},
)
await runtime.chat_mgr.get_or_create(
user_id="@alice:example.org",
chat_id="C1",
platform="matrix",
surface_ref="!chat1:example.org",
name="Чат 1",
)
client = _make_client()
room = SimpleNamespace(room_id="!dm:example.org", display_name="Alice")
event = SimpleNamespace(sender="@alice:example.org", membership="invite")
await handle_invite(
client,
room,
event,
runtime.platform,
runtime.store,
runtime.auth_mgr,
runtime.chat_mgr,
)
client.room_create.assert_not_awaited()
client.room_invite.assert_any_await("!space:example.org", "@alice:example.org")
client.room_invite.assert_any_await("!chat1:example.org", "@alice:example.org")
client.room_send.assert_awaited()
async def test_mat03_no_hardcoded_c1(): async def test_mat03_no_hardcoded_c1():
runtime = build_runtime(platform=MockPlatformClient()) runtime = build_runtime(platform=MockPlatformClient())
await set_user_meta(runtime.store, "@alice:example.org", {"next_chat_index": 7}) await set_user_meta(runtime.store, "@alice:example.org", {"next_chat_index": 7})

View file

@ -4,6 +4,7 @@ import importlib
from types import SimpleNamespace from types import SimpleNamespace
from unittest.mock import AsyncMock from unittest.mock import AsyncMock
from adapter.matrix.agent_registry import AgentDefinition, AgentRegistry
from adapter.matrix.bot import MatrixBot, build_runtime from adapter.matrix.bot import MatrixBot, build_runtime
from adapter.matrix.reconciliation import reconcile_startup_state from adapter.matrix.reconciliation import reconcile_startup_state
from adapter.matrix.store import get_room_meta, get_user_meta, set_room_meta, set_user_meta from adapter.matrix.store import get_room_meta, get_user_meta, set_room_meta, set_user_meta
@ -124,6 +125,55 @@ async def test_reconcile_startup_state_is_idempotent_with_existing_local_state()
assert chats[0].chat_id == "C3" assert chats[0].chat_id == "C3"
async def test_reconcile_updates_default_agent_assignment_after_user_is_configured():
runtime = build_runtime(platform=MockPlatformClient())
runtime.registry = AgentRegistry(
[
AgentDefinition("agent-default", "Default"),
AgentDefinition("agent-alice", "Alice"),
],
user_agents={"@alice:example.org": "agent-alice"},
)
client = SimpleNamespace(
user_id="@bot:example.org",
rooms={
"!space:example.org": _room(
"!space:example.org",
"Lambda - Alice",
["@bot:example.org", "@alice:example.org"],
),
"!chat3:example.org": _room(
"!chat3:example.org",
"Чат 3",
["@bot:example.org", "@alice:example.org"],
parents=("!space:example.org",),
),
},
)
await set_room_meta(
runtime.store,
"!chat3:example.org",
{
"room_type": "chat",
"chat_id": "C3",
"display_name": "Чат 3",
"matrix_user_id": "@alice:example.org",
"space_id": "!space:example.org",
"platform_chat_id": "42",
"agent_id": "agent-default",
"agent_assignment": "default",
},
)
await reconcile_startup_state(client, runtime)
room_meta = await get_room_meta(runtime.store, "!chat3:example.org")
assert room_meta is not None
assert room_meta["agent_id"] == "agent-alice"
assert room_meta["agent_assignment"] == "configured"
assert room_meta["platform_chat_id"] == "42"
async def test_reconciliation_prevents_lazy_bootstrap_for_existing_room(): async def test_reconciliation_prevents_lazy_bootstrap_for_existing_room():
runtime = build_runtime(platform=MockPlatformClient()) runtime = build_runtime(platform=MockPlatformClient())
client = SimpleNamespace( client = SimpleNamespace(

View file

@ -185,6 +185,24 @@ async def test_real_platform_client_send_message_uses_direct_agent_api_per_chat(
assert await prototype_state.get_last_tokens_used_for_context("chat-7") == 0 assert await prototype_state.get_last_tokens_used_for_context("chat-7") == 0
@pytest.mark.asyncio
async def test_real_platform_client_preserves_path_base_url_without_trailing_slash():
agent_api = FakeAgentApiFactory()
client = RealPlatformClient(
agent_id="agent-17",
agent_base_url="http://lambda.coredump.ru:7000/agent_17",
agent_api_cls=agent_api,
prototype_state=PrototypeStateStore(),
platform="matrix",
)
await client.send_message("@alice:example.org", "41", "hello")
assert agent_api.created_calls == [
("agent-17", "http://lambda.coredump.ru:7000/agent_17/", "41")
]
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_real_platform_client_forwards_attachments_to_chat_api(): async def test_real_platform_client_forwards_attachments_to_chat_api():
agent_api = FakeAgentApiFactory(chat_api_cls=AttachmentTrackingChatAgentApi) agent_api = FakeAgentApiFactory(chat_api_cls=AttachmentTrackingChatAgentApi)
@ -213,15 +231,15 @@ async def test_real_platform_client_forwards_attachments_to_chat_api():
def test_attachment_paths_normalize_workspace_roots_to_relative_paths(): def test_attachment_paths_normalize_workspace_roots_to_relative_paths():
attachments = [ attachments = [
Attachment(workspace_path="/workspace/output/report.pdf"), Attachment(workspace_path="/workspace/report.pdf"),
Attachment(workspace_path="/agents/7/output/report.csv"), Attachment(workspace_path="/agents/7/report.csv"),
Attachment(workspace_path="surfaces/matrix/alice/room/inbox/note.txt"), Attachment(workspace_path="note.txt"),
] ]
assert RealPlatformClient._attachment_paths(attachments) == [ assert RealPlatformClient._attachment_paths(attachments) == [
"output/report.pdf", "report.pdf",
"output/report.csv", "report.csv",
"surfaces/matrix/alice/room/inbox/note.txt", "note.txt",
] ]
@ -257,9 +275,12 @@ async def test_real_platform_client_preserves_send_file_events_in_sync_result(mo
@pytest.mark.parametrize( @pytest.mark.parametrize(
("location", "expected_workspace_path"), ("location", "expected_workspace_path"),
[ [
("/workspace/output/report.pdf", "output/report.pdf"), ("/workspace/report.pdf", "report.pdf"),
("/agents/7/output/report.pdf", "output/report.pdf"), ("/agents/7/report.pdf", "report.pdf"),
("surfaces/matrix/alice/room/inbox/report.pdf", "surfaces/matrix/alice/room/inbox/report.pdf"), (
"surfaces/matrix/alice/room/inbox/report.pdf",
"surfaces/matrix/alice/room/inbox/report.pdf",
),
], ],
) )
def test_attachment_from_send_file_event_normalizes_shared_volume_paths( def test_attachment_from_send_file_event_normalizes_shared_volume_paths(

View file

@ -0,0 +1,22 @@
from tools.check_matrix_agents import build_agent_ws_url
def test_build_agent_ws_url_preserves_path_prefix_without_trailing_slash():
assert (
build_agent_ws_url("http://lambda.coredump.ru:7000/agent_17", "41")
== "http://lambda.coredump.ru:7000/agent_17/v1/agent_ws/41/"
)
def test_build_agent_ws_url_preserves_path_prefix_with_trailing_slash():
assert (
build_agent_ws_url("http://lambda.coredump.ru:7000/agent_17/", "41")
== "http://lambda.coredump.ru:7000/agent_17/v1/agent_ws/41/"
)
def test_build_agent_ws_url_accepts_existing_agent_ws_url():
assert (
build_agent_ws_url("http://lambda.coredump.ru:7000/agent_17/v1/agent_ws/0/", "41")
== "http://lambda.coredump.ru:7000/agent_17/v1/agent_ws/41/"
)

View file

@ -39,6 +39,21 @@ def test_dockerfile_production_build_does_not_require_local_external_tree():
assert "uv pip install --system --ignore-requires-python" not in dockerfile assert "uv pip install --system --ignore-requires-python" not in dockerfile
def test_dockerfile_installs_agent_api_after_final_uv_sync():
dockerfile = (ROOT / "Dockerfile").read_text(encoding="utf-8")
development = dockerfile.split("FROM base AS development", maxsplit=1)[1].split(
"FROM base AS production", maxsplit=1
)[0]
production = dockerfile.split("FROM base AS production", maxsplit=1)[1]
assert development.index("RUN uv sync --no-dev --frozen") < development.index(
"pip install --no-cache-dir --ignore-requires-python -e /agent_api/"
)
assert production.index("RUN uv sync --no-dev --frozen") < production.index(
"git+https://git.lambda.coredump.ru/platform/agent_api.git"
)
def test_dockerignore_excludes_local_only_and_runtime_artifacts(): def test_dockerignore_excludes_local_only_and_runtime_artifacts():
dockerignore = (ROOT / ".dockerignore").read_text(encoding="utf-8") dockerignore = (ROOT / ".dockerignore").read_text(encoding="utf-8")
@ -60,3 +75,28 @@ def test_agent_registry_example_documents_multi_agent_volume_contract():
for index, agent in enumerate(agents): for index, agent in enumerate(agents):
assert agent["base_url"].endswith(f"/agent_{index}/") assert agent["base_url"].endswith(f"/agent_{index}/")
assert agent["workspace_path"] == f"/agents/{index}" assert agent["workspace_path"] == f"/agents/{index}"
def test_smoke_compose_models_deploy_like_proxy_and_surface_checker():
smoke = _compose("docker-compose.smoke.yml")
assert set(smoke["services"]) >= {"surface-smoke", "agent-proxy", "agent-0", "agent-1"}
assert "tools.check_matrix_agents" in smoke["services"]["surface-smoke"]["command"]
assert smoke["services"]["agent-proxy"]["ports"] == ["${SMOKE_PROXY_PORT:-7000}:7000"]
def test_smoke_timeout_override_routes_one_agent_to_no_status_stub():
smoke_timeout = _compose("docker-compose.smoke.timeout.yml")
assert set(smoke_timeout["services"]) >= {"agent-proxy", "agent-no-status"}
def test_smoke_registry_targets_local_proxy_routes():
registry = yaml.safe_load(
(ROOT / "config" / "matrix-agents.smoke.yaml").read_text(encoding="utf-8")
)
assert [agent["base_url"] for agent in registry["agents"]] == [
"http://agent-proxy:7000/agent_0/",
"http://agent-proxy:7000/agent_1/",
]

1
tools/__init__.py Normal file
View file

@ -0,0 +1 @@
"""Operational tools for surfaces-bot."""

View file

@ -0,0 +1,197 @@
from __future__ import annotations
import argparse
import asyncio
import json
import os
import time
from dataclasses import asdict, dataclass
from pathlib import Path
from urllib.parse import urljoin
import aiohttp
from adapter.matrix.agent_registry import AgentDefinition, load_agent_registry
from sdk.real import RealPlatformClient
@dataclass
class AgentCheckResult:
agent_id: str
label: str
chat_id: str
base_url: str
ws_url: str
ok: bool
stage: str
latency_ms: int
error: str = ""
response_type: str = ""
def build_agent_ws_url(base_url: str, chat_id: str) -> str:
normalized = RealPlatformClient._normalize_agent_base_url(base_url)
return urljoin(normalized, f"v1/agent_ws/{chat_id}/")
def _message_type(payload: str) -> str:
try:
data = json.loads(payload)
except json.JSONDecodeError:
return ""
value = data.get("type")
return value if isinstance(value, str) else ""
async def _receive_text(ws: aiohttp.ClientWebSocketResponse, timeout: float) -> str:
msg = await asyncio.wait_for(ws.receive(), timeout=timeout)
if msg.type == aiohttp.WSMsgType.TEXT:
return str(msg.data)
if msg.type == aiohttp.WSMsgType.ERROR:
raise RuntimeError(f"websocket error: {ws.exception()}")
raise RuntimeError(f"unexpected websocket message type: {msg.type.name}")
async def check_agent(
agent: AgentDefinition,
*,
fallback_base_url: str,
chat_id: str,
timeout: float,
message: str | None,
) -> AgentCheckResult:
base_url = agent.base_url or fallback_base_url
ws_url = build_agent_ws_url(base_url, chat_id) if base_url else ""
started = time.perf_counter()
def result(ok: bool, stage: str, error: str = "", response_type: str = "") -> AgentCheckResult:
return AgentCheckResult(
agent_id=agent.agent_id,
label=agent.label,
chat_id=chat_id,
base_url=base_url,
ws_url=ws_url,
ok=ok,
stage=stage,
latency_ms=int((time.perf_counter() - started) * 1000),
error=error,
response_type=response_type,
)
if not base_url:
return result(False, "config", "missing base_url and AGENT_BASE_URL")
try:
client_timeout = aiohttp.ClientTimeout(
total=timeout,
connect=timeout,
sock_connect=timeout,
sock_read=timeout,
)
async with aiohttp.ClientSession(timeout=client_timeout) as session:
async with session.ws_connect(ws_url, heartbeat=30) as ws:
raw_status = await _receive_text(ws, timeout)
status_type = _message_type(raw_status)
if status_type != "STATUS":
return result(
False,
"status",
f"expected STATUS, got {raw_status[:200]}",
status_type,
)
if not message:
return result(True, "status", response_type=status_type)
payload = {
"type": "USER_MESSAGE",
"text": message,
"attachments": [],
}
await ws.send_str(json.dumps(payload))
while True:
raw_event = await _receive_text(ws, timeout)
event_type = _message_type(raw_event)
if event_type == "ERROR":
return result(False, "message", raw_event[:200], event_type)
if event_type == "AGENT_EVENT_END":
return result(True, "message", response_type=event_type)
if not event_type:
return result(False, "message", f"invalid JSON event: {raw_event[:200]}")
except TimeoutError:
return result(False, "timeout", f"no response within {timeout:g}s")
except Exception as exc:
return result(False, "connect", str(exc))
def _select_agents(
agents: tuple[AgentDefinition, ...],
selected: set[str],
) -> list[AgentDefinition]:
if not selected:
return list(agents)
return [agent for agent in agents if agent.agent_id in selected]
async def run_checks(args: argparse.Namespace) -> list[AgentCheckResult]:
registry = load_agent_registry(args.config)
selected = _select_agents(registry.agents, set(args.agent))
if not selected:
raise SystemExit("no matching agents selected")
fallback_base_url = args.base_url or os.environ.get("AGENT_BASE_URL", "")
semaphore = asyncio.Semaphore(args.concurrency)
async def run_one(index: int, agent: AgentDefinition) -> AgentCheckResult:
chat_id = str(args.chat_id if args.chat_id is not None else args.chat_id_base + index)
async with semaphore:
return await check_agent(
agent,
fallback_base_url=fallback_base_url,
chat_id=chat_id,
timeout=args.timeout,
message=args.message,
)
return await asyncio.gather(*(run_one(index, agent) for index, agent in enumerate(selected)))
def print_table(results: list[AgentCheckResult]) -> None:
for item in results:
status = "OK" if item.ok else "FAIL"
detail = item.response_type or item.error
print(
f"{status:4} {item.agent_id:20} {item.stage:8} "
f"{item.latency_ms:5}ms chat={item.chat_id} url={item.ws_url} {detail}"
)
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Smoke-check Matrix agent WebSocket endpoints from matrix-agents.yaml."
)
parser.add_argument("--config", type=Path, default=Path("config/matrix-agents.yaml"))
parser.add_argument("--agent", action="append", default=[], help="Agent id to check")
parser.add_argument("--base-url", default="", help="Fallback base URL when an agent has none")
parser.add_argument("--timeout", type=float, default=10.0)
parser.add_argument("--concurrency", type=int, default=5)
parser.add_argument("--chat-id", type=int, default=None, help="Use one explicit chat id")
parser.add_argument("--chat-id-base", type=int, default=900000)
parser.add_argument("--message", default=None, help="Optional test message after STATUS")
parser.add_argument("--json", action="store_true", help="Print machine-readable JSON")
return parser.parse_args()
def main() -> int:
args = parse_args()
results = asyncio.run(run_checks(args))
if args.json:
print(json.dumps([asdict(result) for result in results], ensure_ascii=False, indent=2))
else:
print_table(results)
return 0 if all(result.ok for result in results) else 1
if __name__ == "__main__":
raise SystemExit(main())

33
tools/no_status_agent.py Normal file
View file

@ -0,0 +1,33 @@
from __future__ import annotations
import argparse
import asyncio
from aiohttp import web
async def websocket_handler(request: web.Request) -> web.WebSocketResponse:
ws = web.WebSocketResponse()
await ws.prepare(request)
await asyncio.sleep(3600)
return ws
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="WebSocket stub that accepts connections but sends no STATUS."
)
parser.add_argument("--host", default="127.0.0.1")
parser.add_argument("--port", type=int, default=8000)
return parser.parse_args()
def main() -> None:
args = parse_args()
app = web.Application()
app.router.add_get("/v1/agent_ws/{chat_id}/", websocket_handler)
web.run_app(app, host=args.host, port=args.port)
if __name__ == "__main__":
main()