wip: 05-mvp-deployment paused at task 0/0
This commit is contained in:
parent
7e5f9c20a0
commit
6369721876
2 changed files with 93 additions and 42 deletions
|
|
@ -1,86 +1,114 @@
|
||||||
{
|
{
|
||||||
"version": "1.0",
|
"version": "1.0",
|
||||||
"timestamp": "2026-04-28T18:39:43.064Z",
|
"timestamp": "2026-04-30T15:03:14Z",
|
||||||
"phase": "05",
|
"phase": "05",
|
||||||
"phase_name": "MVP Deployment",
|
"phase_name": "MVP deployment",
|
||||||
"phase_dir": ".planning/phases/05-mvp-deployment",
|
"phase_dir": ".planning/phases/05-mvp-deployment",
|
||||||
"plan": 4,
|
"plan": 0,
|
||||||
"task": 0,
|
"task": 0,
|
||||||
"total_tasks": 0,
|
"total_tasks": 0,
|
||||||
"status": "paused",
|
"status": "paused",
|
||||||
"completed_tasks": [
|
"completed_tasks": [
|
||||||
{
|
{
|
||||||
"id": 1,
|
"id": 1,
|
||||||
"name": "Finalize multi-agent surface image handoff",
|
"name": "Fix path-based base_url normalization and add WS debug visibility",
|
||||||
"status": "done",
|
"status": "done",
|
||||||
"commit": "5b53788"
|
"commit": "7e5f9c2"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
"name": "Publish Docker image for the Matrix surface",
|
"name": "Add Matrix room recovery, reinvite flow, and default-agent warning behavior",
|
||||||
"status": "done",
|
"status": "done",
|
||||||
"artifact": "mput1/surfaces-bot:latest",
|
"commit": "7e5f9c2"
|
||||||
"digest": "sha256:26ba3a49290ab7c1cf0fa97f3de3fefdc70b59df7e6f1e0c2255728f8e2369be"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 3,
|
"id": 3,
|
||||||
"name": "Verify multi-agent file-volume routing contract",
|
"name": "Switch user file handling to workspace-root filenames with copy-style collision suffixes",
|
||||||
"status": "done",
|
"status": "done",
|
||||||
"evidence": "tests cover /agents/17/incoming and /agents/17/output routing"
|
"commit": "7e5f9c2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"name": "Verify recent routing incident cause",
|
||||||
|
"status": "done",
|
||||||
|
"progress": "Confirmed that config lookup is exact-MXID based; mismatch in homeserver suffix caused fallback to the first agent."
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"remaining_tasks": [
|
"remaining_tasks": [
|
||||||
{
|
{
|
||||||
"id": 1,
|
"id": 5,
|
||||||
"name": "Platform team integrates the published surface image into their 25-30 agent deployment",
|
"name": "Build and publish a fresh production image with the current workspace-root attachment contract",
|
||||||
"status": "external"
|
"status": "not_started"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2,
|
"id": 6,
|
||||||
"name": "Run a real platform smoke test with production Matrix credentials, matrix-agents.yaml, and shared /agents volume",
|
"name": "Send the new digest to platform and request Matrix bot redeploy",
|
||||||
"status": "not_started"
|
"status": "not_started"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"blockers": [
|
"blockers": [
|
||||||
{
|
{
|
||||||
"description": "Full production verification depends on the platform team's real 25-30 agent orchestration and volume mounts.",
|
"description": "Platform redeploy is still required after the next image publish.",
|
||||||
"type": "external",
|
"type": "external",
|
||||||
"workaround": "Use docker-compose.fullstack.yml only as local E2E harness; production uses mput1/surfaces-bot:latest plus platform-managed agents."
|
"workaround": "None until a fresh digest is published."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "Old Phase 04 planning files still contain placeholder content.",
|
||||||
|
"type": "technical",
|
||||||
|
"workaround": "Ignore for the current deploy task; clean later as planning debt."
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"human_actions_pending": [
|
"human_actions_pending": [
|
||||||
{
|
{
|
||||||
"action": "Send platform the image tag, digest, deploy docs, and matrix-agents.yaml contract",
|
"action": "Use exact Matrix MXIDs in user_agents, including the real homeserver suffix.",
|
||||||
"context": "The bot is published as a single surface container; platform supplies agents, base_url values, and /agents/N volume mounts.",
|
"context": "Routing fallback to the first agent occurs whenever the config key does not exactly match the sender.",
|
||||||
"blocking": true
|
"blocking": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"action": "Platform prepares production config/matrix-agents.yaml",
|
"action": "Redeploy matrix-bot after the new image is published.",
|
||||||
"context": "Each external agent needs agent_id, base_url, and workspace_path such as /agents/17.",
|
"context": "Config edits alone need a container restart; the file-contract code change needs a new image first.",
|
||||||
"blocking": true
|
"blocking": true
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"decisions": [
|
"decisions": [
|
||||||
{
|
{
|
||||||
"decision": "Ship one generic Matrix surface image, not a compose stack with 25-30 agents.",
|
"decision": "Keep fallback to the first agent for users missing from user_agents.",
|
||||||
"rationale": "The platform owns agent lifecycle/orchestration; the surface only needs base_url and workspace_path per agent.",
|
"rationale": "Platform wanted that behavior to remain available, but with explicit user warning.",
|
||||||
"phase": "05"
|
"phase": "05"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"decision": "Make SURFACES_BOT_IMAGE explicit and document the published mput1/surfaces-bot image.",
|
"decision": "Require exact Matrix MXID matching in user_agents.",
|
||||||
"rationale": "Docker Hub push access is namespace-specific; hardcoding mrkan0 caused insufficient_scope.",
|
"rationale": "Current routing is deterministic and simple; no fuzzy matching or homeserver aliasing was introduced.",
|
||||||
"phase": "05"
|
"phase": "05"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"decision": "Keep docker-compose.fullstack.yml as internal E2E only.",
|
"decision": "Use workspace-root filenames for incoming user files and Windows-style copy suffixes on collision.",
|
||||||
"rationale": "It validates the bot plus one local agent, but is not a model of production multi-agent orchestration.",
|
"rationale": "Platform requested removal of incoming/outgoing directory split and timestamp-prefixed names.",
|
||||||
"phase": "05"
|
"phase": "05"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"uncommitted_files": [
|
"uncommitted_files": [
|
||||||
".planning/HANDOFF.json",
|
".planning/HANDOFF.json",
|
||||||
".planning/phases/05-mvp-deployment/.continue-here.md"
|
".planning/STATE.md",
|
||||||
|
".planning/phases/05-mvp-deployment/.continue-here.md",
|
||||||
|
"README.md",
|
||||||
|
"adapter/matrix/agent_registry.py",
|
||||||
|
"adapter/matrix/bot.py",
|
||||||
|
"adapter/matrix/files.py",
|
||||||
|
"adapter/matrix/handlers/auth.py",
|
||||||
|
"adapter/matrix/handlers/chat.py",
|
||||||
|
"adapter/matrix/reconciliation.py",
|
||||||
|
"adapter/matrix/routed_platform.py",
|
||||||
|
"config/matrix-agents.example.yaml",
|
||||||
|
"docs/deploy-architecture.md",
|
||||||
|
"sdk/real.py",
|
||||||
|
"tests/adapter/matrix/test_dispatcher.py",
|
||||||
|
"tests/adapter/matrix/test_files.py",
|
||||||
|
"tests/adapter/matrix/test_invite_space.py",
|
||||||
|
"tests/adapter/matrix/test_reconciliation.py",
|
||||||
|
"tests/platform/test_real.py",
|
||||||
|
"tests/test_deploy_handoff.py"
|
||||||
],
|
],
|
||||||
"next_action": "Resume by coordinating platform integration: confirm they use mput1/surfaces-bot:latest, mount /agents, provide config/matrix-agents.yaml, then run a real Matrix smoke test.",
|
"next_action": "Build and publish a fresh production image from the current worktree, then send the digest to the platform for redeploy.",
|
||||||
"context_notes": "Phase 05 implementation and handoff commit 5b53788 are pushed. The Docker image was successfully built and pushed by the user as mput1/surfaces-bot:latest with digest sha256:26ba3a49290ab7c1cf0fa97f3de3fefdc70b59df7e6f1e0c2255728f8e2369be. Existing unrelated .planning dirt and a local jpg remain in the worktree and were intentionally not included in the handoff commit."
|
"context_notes": "Current runtime logic appears correct. The last reported routing bug was traced to config mismatch between the real Matrix sender and the user_agents key. Do not reuse the previously published recovery image for deployment because it does not include the final workspace-root file contract."
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,37 +3,60 @@ phase: 05-mvp-deployment
|
||||||
phase_name: MVP deployment
|
phase_name: MVP deployment
|
||||||
task: 0
|
task: 0
|
||||||
total_tasks: 0
|
total_tasks: 0
|
||||||
status: completed
|
status: paused
|
||||||
last_updated: 2026-04-28T21:07:17Z
|
last_updated: 2026-04-30T15:03:14Z
|
||||||
---
|
---
|
||||||
|
|
||||||
<current_state>
|
<current_state>
|
||||||
Phase 05 deployment handoff is complete. Image rebuilt for linux/amd64 and handoff text prepared for platform team.
|
Phase 05 code changes are in place, but the latest workspace-root attachment contract is not yet published in a new production image. Today's last debugging step confirmed that the user-to-agent config itself was fine except for one exact-MXID mismatch: the homeserver suffix in `user_agents` did not match the real Matrix sender, so fallback to the first agent was expected.
|
||||||
</current_state>
|
</current_state>
|
||||||
|
|
||||||
<completed_work>
|
<completed_work>
|
||||||
|
|
||||||
- Rebuilt image for linux/amd64 (was arm64 only): `mput1/surfaces-bot:latest`
|
- Fixed the path-based `base_url` normalization bug that caused WS connects to drop route prefixes.
|
||||||
- Updated deploy handoff digest in .continue-here.md
|
- Added WS lifecycle debug logging behind `SURFACES_DEBUG_WS=1`.
|
||||||
- Prepared deployment checklist text for platform
|
- Added Matrix routing/recovery behavior:
|
||||||
|
- warning users when they are not listed in `user_agents`
|
||||||
|
- preserving room bindings across config updates
|
||||||
|
- re-inviting users back into their Space and active rooms after leave
|
||||||
|
- `!new` from the entry/DM room to create a fresh working chat
|
||||||
|
- Reworked attachment handling so user files now go directly into the agent workspace root with Windows-style collision suffixes like `file (1).pdf`.
|
||||||
|
- Updated docs and tests to match the new root-workspace file contract.
|
||||||
|
- Verified that the recent “still goes to default agent” report was caused by exact MXID mismatch in config, not by YAML parsing or runtime routing logic.
|
||||||
|
- Published earlier images:
|
||||||
|
- `mput1/surfaces-bot:debug-ws-20260429`
|
||||||
|
- `mput1/surfaces-bot:matrix-recovery-20260429`
|
||||||
</completed_work>
|
</completed_work>
|
||||||
|
|
||||||
<remaining_work>
|
<remaining_work>
|
||||||
|
|
||||||
- Platform needs to pull image and deploy
|
- Build and publish a new production image that includes the latest workspace-root attachment changes.
|
||||||
- Awaiting smoke test confirmation from platform side
|
- Give the platform the new digest and ask them to redeploy the Matrix bot container.
|
||||||
|
- Optionally run local smoke/fullstack validation once more before publishing if extra confidence is needed.
|
||||||
</remaining_work>
|
</remaining_work>
|
||||||
|
|
||||||
<decisions_made>
|
<decisions_made>
|
||||||
|
|
||||||
- Rebuild for amd64 to match platform's production environment
|
- Keep the fallback to the first agent when a user is missing from `user_agents`.
|
||||||
|
- Require exact Matrix MXID match in `user_agents`; no fuzzy matching or homeserver normalization was added.
|
||||||
|
- Warn the user in-band when default-agent fallback is used.
|
||||||
|
- Keep room identity and `platform_chat_id` stable across config updates.
|
||||||
|
- Require container restart for config changes; no image rebuild is needed for `matrix-agents.yaml` edits alone.
|
||||||
|
- Remove `incoming/` and timestamp prefixes from the attachment contract.
|
||||||
|
- Save uploaded user files directly at the workspace root and resolve collisions with copy-style suffixes.
|
||||||
</decisions_made>
|
</decisions_made>
|
||||||
|
|
||||||
<blockers>
|
<blockers>
|
||||||
|
|
||||||
- None — implementation complete, awaiting platform deployment
|
- No code blocker.
|
||||||
|
- External dependency: platform redeploy after the next image publish.
|
||||||
|
- Historical debt: placeholder summary/plan artifacts still exist in old Phase 04 files and were not cleaned during this session.
|
||||||
</blockers>
|
</blockers>
|
||||||
|
|
||||||
|
<context>
|
||||||
|
The current codebase should route correctly if the deployed config uses the exact real Matrix sender IDs, e.g. `@user:matrix.lambda.coredump.ru`. The next likely mistake during resume would be publishing the wrong image digest: the currently published recovery image predates the latest file-contract change. Resume by building a fresh image from the current worktree, not by reusing the old digest.
|
||||||
|
</context>
|
||||||
|
|
||||||
<next_action>
|
<next_action>
|
||||||
Await platform deployment confirmation. No further implementation work needed until platform reports issues or requests changes.
|
Rebuild the production image from the current worktree, publish it, and send the new digest to the platform for redeploy.
|
||||||
</next_action>
|
</next_action>
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue