wip: deployment architecture research — Phase 05 ready to plan

- docs/deploy-architecture.md: full deployment topology, agent API, file transfer via shared volume
- .planning/HANDOFF.json + .continue-here.md: session state for Phase 05 planning
This commit is contained in:
Mikhail Putilovskij 2026-04-27 21:46:27 +03:00
parent c34db0e6c0
commit 8ffbe7b6b3
4 changed files with 308 additions and 29 deletions

View file

@ -1,38 +1,100 @@
{
"version": "1.0",
"timestamp": "2026-04-24T12:16:09.301Z",
"phase": "04",
"phase_name": "matrix-mvp-shared-agent-context-and-context-management",
"phase_dir": "04-matrix-mvp-shared-agent-context-and-context-management-comma",
"plan": null,
"task": null,
"total_tasks": null,
"status": "paused",
"timestamp": "2026-04-27T18:44:51.832Z",
"phase": "05",
"phase_name": "deployment",
"phase_dir": null,
"plan": 0,
"task": 0,
"total_tasks": 0,
"status": "pre-planning",
"completed_tasks": [
{"id": 1, "name": "docker-compose config mount + MATRIX_AGENT_REGISTRY_PATH", "status": "done"},
{"id": 2, "name": "debug logging in sdk/real.py (_stream_agent_events)", "status": "done"},
{"id": 3, "name": "debug logging in platform-agent service.py", "status": "done"}
{
"id": 1,
"name": "Research platform repos (agent, agent_api, master)",
"status": "done",
"commit": null
},
{
"id": 2,
"name": "Clarify deployment topology with platform team",
"status": "done",
"commit": null
},
{
"id": 3,
"name": "Create docs/deploy-architecture.md",
"status": "done",
"commit": null
}
],
"remaining_tasks": [
{"id": 4, "name": "run docker compose up --build and get platform-agent logs with stream_event lines", "status": "not_started"},
{"id": 5, "name": "analyze logs: content_type and langgraph_node to find where first chunk is lost", "status": "not_started"},
{"id": 6, "name": "fix in service.py based on findings (filter by node, handle list content, or capture subagent output)", "status": "not_started"}
{"id": 4, "name": "Merge feat/matrix-direct-agent-prototype → main", "status": "not_started"},
{"id": 5, "name": "Plan Phase 05 (deployment)", "status": "not_started"},
{"id": 6, "name": "Execute Phase 05", "status": "not_started"}
],
"blockers": [
{
"description": "agent_api #9-clientside-tool-call убирает attachments и MsgEventSendFile — если смержат до деплоя, сломает file transfer",
"type": "external",
"workaround": "Используем master пока #9 не merged. Уточнить у Азамата сроки."
},
{
"description": "AGENT_ID и COMPOSIO_API_KEY значения для каждого агента — нужны от платформы",
"type": "human_action",
"workaround": "Запросить у Азамата перед деплоем"
}
],
"blockers": [],
"human_actions_pending": [
{"action": "run docker compose up --build and reproduce the alphabet/image truncation bug", "context": "Need platform-agent logs with DEBUG level to see stream_event lines", "blocking": true}
{
"action": "Получить значения AGENT_ID и COMPOSIO_API_KEY для каждого агента от платформы",
"context": "Composio смержен в main platform-agent, теперь обязателен",
"blocking": true
},
{
"action": "Уточнить у Азамата сроки мержа agent_api #9 (убирает attachments/MsgEventSendFile)",
"context": "Мы строим file transfer на этих фичах из master",
"blocking": false
},
{
"action": "Уточнить: chat_id=0 для всех или используем разные chat_id для C1/C2/C3",
"context": "Платформа показала пример с одним AgentApi на агента без явного chat_id",
"blocking": false
}
],
"decisions": [
{"decision": "Bug is in platform-agent service.py __astream, not in surfaces bot", "rationale": "Logs show first text chunk already truncated at index=0 level", "phase": "04"},
{"decision": "deepagents uses dispatcher+subagent architecture", "rationale": "create_deep_agent wraps SubAgentMiddleware with general-purpose subagent", "phase": "04"},
{"decision": "astream_events v2 processes on_chat_model_stream from ALL nodes without filtering", "rationale": "service.py has no namespace/node filtering", "phase": "04"}
{
"decision": "Один инстанс Matrix-бота на всех пользователей, один агент-контейнер на пользователя",
"rationale": "Подтверждено платформой. Reverse proxy на lambda.coredump.ru:7000 роутит по пути /agent_N/",
"phase": "pre-05"
},
{
"decision": "Файлы через shared volume /agents/, не через API",
"rationale": "Surface и агент видят один volume. Surface пишет файл → передаёт путь в attachments. Агент эмитит MsgEventSendFile → Surface читает файл и шлёт в Matrix",
"phase": "pre-05"
},
{
"decision": "Используем agent_api master (с attachments и MsgEventSendFile), не ветку #9",
"rationale": "master стабильный, #9 в разработке и убирает нужные нам фичи",
"phase": "pre-05"
},
{
"decision": "Конфиг: два словаря — user_id→agent_id и agent_id→{base_url, workspace_path}",
"rationale": "Платформа подтвердила статический маппинг для MVP без Master",
"phase": "pre-05"
},
{
"decision": "Master (platform-master feat/storage) не используем для MVP",
"rationale": "Ещё в разработке. Используем статический конфиг. При готовности Master — мигрируем.",
"phase": "pre-05"
}
],
"uncommitted_files": [
"sdk/real.py (debug logging added)",
"docker-compose.yml (config volume mount added)",
"config/matrix-agents.example.yaml (label names updated)",
"external/platform-agent/src/agent/service.py (debug logging added, in submodule)"
"docs/deploy-architecture.md",
"docs/superpowers/plans/2026-04-24-matrix-multi-agent-routing-and-restart-state.md",
"config/matrix-agents.yaml",
".planning/STATE.md"
],
"next_action": "Run: docker compose up --build. Send a message that triggers the bug (e.g. 'Напомни алфавит' after sending an image). Look for stream_event lines in platform-agent-1 logs. Check content_type and langgraph_node values for truncated responses.",
"context_notes": "Investigating first-chunk truncation bug in Matrix bot responses. The bug appears when agent uses tools (image analysis) OR when images are in context. Platform-agent uses deepagents framework (dispatcher+subagent pattern). The hypothesis is that on_chat_model_stream events from multiple graph nodes are all forwarded as MsgEventTextChunk without filtering, OR that chunk.content is sometimes a list instead of str causing validation issues. Added logging to confirm. The fix will likely be in service.py: either filter by langgraph_node or handle list content type."
"next_action": "Запустить /gsd-plan-phase 05 для планирования фазы деплоя. Прочитать docs/deploy-architecture.md перед планированием.",
"context_notes": "Phase 04 полностью завершена, ветка feat/matrix-direct-agent-prototype готова к merge. Этот сеанс был посвящён архитектуре деплоя — исследовали платформу, обсуждали с командой. Всё что знаем про деплой — в docs/deploy-architecture.md. Phase 05 = деплой: обновить конфиг, sdk/real.py, добавить file transfer в Matrix адаптер, написать docker-compose."
}