ref #13 : in-memory metadata repositories (S02)

add storage foundation contracts
2026-04-07 20:58:30 +03:00 · 2026-04-07 19:31:50 +03:00
10 changed files with 444 additions and 0 deletions
--- a/docs/009-storage-foundation.md
+++ b/docs/009-storage-foundation.md
@ -0,0 +1,17 @@
+# 009 Storage foundation
+
+## Context
+- v1 storage slice needs workspace, chat and file flows before durable DB
+- trusted caller passes `user_id`, and one workspace belongs to one user
+- chat content must live outside sandbox lifecycle and survive sandbox restart
+
+## Decision
+- metadata repositories are in-memory for the first storage slice
+- `Workspace`, `Chat` and `ChatFile` are first-class domain entities
+- filesystem access stays behind storage ports in outer layers
+- sandbox later integrates through chat metadata and storage ports, not raw path math in usecases
+
+## Consequences
+- metadata is lost on restart in this phase
+- storage usecases and HTTP API can be built before durable persistence
+- later durable metadata can replace in-memory adapters behind the same ports
--- a/docs/010-chat-history-policy.md
+++ b/docs/010-chat-history-policy.md
@ -0,0 +1,17 @@
+# 010 Chat history policy
+
+## Context
+- v1 keeps chat history in filesystem, not in central DB
+- chat metadata must not depend on parsing history content
+- each chat already maps to an isolated working directory
+
+## Decision
+- each chat owns one `history.md` inside its chat directory
+- `history.md` is created with chat layout initialization
+- chat metadata stores identity and lifecycle fields separately from history content
+- history read and write stay behind storage ports in outer layers
+
+## Consequences
+- history survives sandbox restart with chat storage
+- metadata and content evolve independently
+- later migration to another history backend can keep the same chat identity model
--- a/domain/chat.py
+++ b/domain/chat.py
@ -0,0 +1,35 @@
+from dataclasses import dataclass
+from datetime import datetime
+from uuid import UUID
+
+HISTORY_FILE_NAME = 'history.md'
+
+
+@dataclass(frozen=True, slots=True)
+class ChatAttachmentName:
+    value: str
+
+    def __post_init__(self) -> None:
+        if not self.value or self.value in {'.', '..'}:
+            raise ValueError('invalid attachment name')
+        if '/' in self.value or '\\' in self.value:
+            raise ValueError('invalid attachment name')
+        if self.value == HISTORY_FILE_NAME:
+            raise ValueError('reserved attachment name')
+
+
+@dataclass(frozen=True, slots=True)
+class Chat:
+    chat_id: UUID
+    workspace_id: UUID
+    created_at: datetime
+
+
+@dataclass(frozen=True, slots=True)
+class ChatFile:
+    file_id: UUID
+    chat_id: UUID
+    name: ChatAttachmentName
+    content_type: str | None
+    size_bytes: int
+    created_at: datetime
--- a/domain/error.py
+++ b/domain/error.py
@ -1,3 +1,6 @@
+from uuid import UUID
+
+
 class DomainError(Exception):
    pass

@ -18,6 +21,48 @@ class UserConflictError(UserError):
        self.email = email


+class WorkspaceError(DomainError):
+    pass
+
+
+class WorkspaceNotFoundError(WorkspaceError):
+    def __init__(self, workspace_id: UUID) -> None:
+        super().__init__('workspace_not_found')
+        self.workspace_id = workspace_id
+
+
+class WorkspaceQuotaExceededError(WorkspaceError):
+    def __init__(self, workspace_id: UUID) -> None:
+        super().__init__('workspace_quota_exceeded')
+        self.workspace_id = workspace_id
+
+
+class ChatError(DomainError):
+    pass
+
+
+class ChatNotFoundError(ChatError):
+    def __init__(self, chat_id: UUID) -> None:
+        super().__init__('chat_not_found')
+        self.chat_id = chat_id
+
+
+class ChatHasActiveSandboxError(ChatError):
+    def __init__(self, chat_id: UUID) -> None:
+        super().__init__('chat_has_active_sandbox')
+        self.chat_id = chat_id
+
+
+class ChatFileError(DomainError):
+    pass
+
+
+class ChatFileNotFoundError(ChatFileError):
+    def __init__(self, file_id: UUID) -> None:
+        super().__init__('chat_file_not_found')
+        self.file_id = file_id
+
+
 class SandboxError(DomainError):
    pass

--- a/domain/workspace.py
+++ b/domain/workspace.py
@ -0,0 +1,17 @@
+from dataclasses import dataclass
+from datetime import datetime
+from uuid import UUID
+
+
+@dataclass(frozen=True, slots=True)
+class Workspace:
+    workspace_id: UUID
+    user_id: UUID
+    created_at: datetime
+
+
+@dataclass(frozen=True, slots=True)
+class WorkspaceUsage:
+    workspace_id: UUID
+    used_bytes: int
+    quota_bytes: int
--- a/repository/chat.py
+++ b/repository/chat.py
@ -0,0 +1,24 @@
+from uuid import UUID
+
+from domain.chat import Chat
+from usecase.interface import ChatRepository
+
+
+class InMemoryChatRepository(ChatRepository):
+    def __init__(self) -> None:
+        self._by_id: dict[UUID, Chat] = {}
+
+    def get(self, chat_id: UUID) -> Chat | None:
+        return self._by_id.get(chat_id)
+
+    def list_by_workspace_id(self, workspace_id: UUID) -> list[Chat]:
+        return sorted(
+            (c for c in self._by_id.values() if c.workspace_id == workspace_id),
+            key=lambda c: (c.created_at, c.chat_id),
+        )
+
+    def save(self, chat: Chat) -> None:
+        self._by_id[chat.chat_id] = chat
+
+    def delete(self, chat_id: UUID) -> None:
+        self._by_id.pop(chat_id, None)
--- a/repository/chat_file.py
+++ b/repository/chat_file.py
@ -0,0 +1,57 @@
+from uuid import UUID
+
+from domain.chat import ChatAttachmentName, ChatFile
+from usecase.interface import ChatFileRepository
+
+
+class InMemoryChatFileRepository(ChatFileRepository):
+    def __init__(self) -> None:
+        self._by_id: dict[UUID, ChatFile] = {}
+        self._by_chat_and_name: dict[tuple[UUID, str], UUID] = {}
+
+    def get(self, file_id: UUID) -> ChatFile | None:
+        return self._by_id.get(file_id)
+
+    def get_by_chat_id_and_name(
+        self,
+        chat_id: UUID,
+        name: ChatAttachmentName,
+    ) -> ChatFile | None:
+        fid = self._by_chat_and_name.get((chat_id, name.value))
+        if fid is None:
+            return None
+        return self._by_id.get(fid)
+
+    def list_by_chat_id(self, chat_id: UUID) -> list[ChatFile]:
+        return sorted(
+            (f for f in self._by_id.values() if f.chat_id == chat_id),
+            key=lambda f: (f.created_at, f.file_id),
+        )
+
+    def save(self, chat_file: ChatFile) -> None:
+        key = (chat_file.chat_id, chat_file.name.value)
+        existing_at_key = self._by_chat_and_name.get(key)
+        if existing_at_key is not None and existing_at_key != chat_file.file_id:
+            self._by_id.pop(existing_at_key, None)
+
+        previous = self._by_id.get(chat_file.file_id)
+        if previous is not None:
+            prev_key = (previous.chat_id, previous.name.value)
+            if self._by_chat_and_name.get(prev_key) == previous.file_id:
+                del self._by_chat_and_name[prev_key]
+
+        self._by_id[chat_file.file_id] = chat_file
+        self._by_chat_and_name[key] = chat_file.file_id
+
+    def delete(self, file_id: UUID) -> None:
+        chat_file = self._by_id.pop(file_id, None)
+        if chat_file is None:
+            return
+        key = (chat_file.chat_id, chat_file.name.value)
+        if self._by_chat_and_name.get(key) == file_id:
+            del self._by_chat_and_name[key]
+
+    def delete_by_chat_id(self, chat_id: UUID) -> None:
+        file_ids = [f.file_id for f in self._by_id.values() if f.chat_id == chat_id]
+        for fid in file_ids:
+            self.delete(fid)
--- a/repository/workspace.py
+++ b/repository/workspace.py
@ -0,0 +1,26 @@
+from uuid import UUID
+
+from domain.workspace import Workspace
+from usecase.interface import WorkspaceRepository
+
+
+class InMemoryWorkspaceRepository(WorkspaceRepository):
+    def __init__(self) -> None:
+        self._by_id: dict[UUID, Workspace] = {}
+        self._user_id_to_workspace_id: dict[UUID, UUID] = {}
+
+    def get(self, workspace_id: UUID) -> Workspace | None:
+        return self._by_id.get(workspace_id)
+
+    def get_by_user_id(self, user_id: UUID) -> Workspace | None:
+        wid = self._user_id_to_workspace_id.get(user_id)
+        if wid is None:
+            return None
+        return self._by_id.get(wid)
+
+    def save(self, workspace: Workspace) -> None:
+        existing_wid = self._user_id_to_workspace_id.get(workspace.user_id)
+        if existing_wid is not None and existing_wid != workspace.workspace_id:
+            self._by_id.pop(existing_wid, None)
+        self._by_id[workspace.workspace_id] = workspace
+        self._user_id_to_workspace_id[workspace.user_id] = workspace.workspace_id
--- a/test/test_storage_metadata_repositories.py
+++ b/test/test_storage_metadata_repositories.py
@ -0,0 +1,129 @@
+from datetime import UTC, datetime
+from uuid import UUID
+
+from domain.chat import Chat, ChatAttachmentName, ChatFile
+from domain.workspace import Workspace
+from repository.chat import InMemoryChatRepository
+from repository.chat_file import InMemoryChatFileRepository
+from repository.workspace import InMemoryWorkspaceRepository
+
+USER_A = UUID('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa')
+USER_B = UUID('bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb')
+WS_A = UUID('11111111-1111-1111-1111-111111111111')
+WS_B = UUID('22222222-2222-2222-2222-222222222222')
+CHAT_A = UUID('33333333-3333-3333-3333-333333333333')
+CHAT_B = UUID('44444444-4444-4444-4444-444444444444')
+FILE_A = UUID('55555555-5555-5555-5555-555555555555')
+FILE_B = UUID('66666666-6666-6666-6666-666666666666')
+TS = datetime(2026, 4, 1, 12, 0, 0, tzinfo=UTC)
+TS_2 = datetime(2026, 4, 1, 13, 0, 0, tzinfo=UTC)
+
+
+def test_workspace_get_by_user_id() -> None:
+    repo = InMemoryWorkspaceRepository()
+    ws = Workspace(workspace_id=WS_A, user_id=USER_A, created_at=TS)
+    repo.save(ws)
+    assert repo.get(WS_A) == ws
+    assert repo.get_by_user_id(USER_A) == ws
+    assert repo.get_by_user_id(USER_B) is None
+
+
+def test_workspace_replace_for_user() -> None:
+    repo = InMemoryWorkspaceRepository()
+    ws = Workspace(workspace_id=WS_A, user_id=USER_A, created_at=TS)
+    repo.save(ws)
+    new_ws = Workspace(workspace_id=WS_B, user_id=USER_A, created_at=TS_2)
+    repo.save(new_ws)
+    assert repo.get_by_user_id(USER_A) == new_ws
+    assert repo.get(WS_A) is None
+    assert repo.get(WS_B) == new_ws
+
+
+def test_chat_crud_workspace_scope() -> None:
+    chat_repo = InMemoryChatRepository()
+    chat_a = Chat(chat_id=CHAT_A, workspace_id=WS_A, created_at=TS)
+    chat_b = Chat(chat_id=CHAT_B, workspace_id=WS_A, created_at=TS_2)
+    chat_repo.save(chat_a)
+    chat_repo.save(chat_b)
+
+    listed = chat_repo.list_by_workspace_id(WS_A)
+    assert listed == [chat_a, chat_b]
+
+    assert chat_repo.get(CHAT_A) == chat_a
+    chat_repo.delete(CHAT_A)
+    assert chat_repo.get(CHAT_A) is None
+    assert chat_repo.list_by_workspace_id(WS_A) == [chat_b]
+
+
+def test_chat_list_only_same_workspace() -> None:
+    chat_repo = InMemoryChatRepository()
+    chat_a = Chat(chat_id=CHAT_A, workspace_id=WS_A, created_at=TS)
+    chat_b = Chat(chat_id=CHAT_B, workspace_id=WS_B, created_at=TS_2)
+    chat_repo.save(chat_a)
+    chat_repo.save(chat_b)
+    assert chat_repo.list_by_workspace_id(WS_A) == [chat_a]
+    assert chat_repo.list_by_workspace_id(WS_B) == [chat_b]
+
+
+def test_chat_file_metadata_save_get_list_delete_clear() -> None:
+    name_a = ChatAttachmentName('doc.pdf')
+    name_b = ChatAttachmentName('x.png')
+
+    repo = InMemoryChatFileRepository()
+    f_a = ChatFile(
+        file_id=FILE_A,
+        chat_id=CHAT_A,
+        name=name_a,
+        content_type='application/pdf',
+        size_bytes=100,
+        created_at=TS,
+    )
+    f_b = ChatFile(
+        file_id=FILE_B,
+        chat_id=CHAT_A,
+        name=name_b,
+        content_type='image/png',
+        size_bytes=200,
+        created_at=TS_2,
+    )
+    repo.save(f_a)
+    repo.save(f_b)
+
+    assert repo.get(FILE_A) == f_a
+    assert repo.get_by_chat_id_and_name(CHAT_A, name_a) == f_a
+    listed = repo.list_by_chat_id(CHAT_A)
+    assert listed == [f_a, f_b]
+
+    repo.delete(FILE_A)
+    assert repo.get(FILE_A) is None
+    assert repo.get_by_chat_id_and_name(CHAT_A, name_a) is None
+
+    repo.save(f_a)
+    repo.delete_by_chat_id(CHAT_A)
+    assert repo.list_by_chat_id(CHAT_A) == []
+
+
+def test_chat_file_same_name_replaced_by_new_id() -> None:
+    name = ChatAttachmentName('a.txt')
+    repo = InMemoryChatFileRepository()
+    first = ChatFile(
+        file_id=FILE_A,
+        chat_id=CHAT_A,
+        name=name,
+        content_type='text/plain',
+        size_bytes=1,
+        created_at=TS,
+    )
+    second = ChatFile(
+        file_id=FILE_B,
+        chat_id=CHAT_A,
+        name=name,
+        content_type='text/plain',
+        size_bytes=2,
+        created_at=TS_2,
+    )
+    repo.save(first)
+    repo.save(second)
+    assert repo.get(FILE_A) is None
+    assert repo.get(FILE_B) == second
+    assert repo.get_by_chat_id_and_name(CHAT_A, name) == second
--- a/usecase/interface.py
+++ b/usecase/interface.py
@ -4,8 +4,10 @@ from types import TracebackType
 from typing import Protocol, TypeAlias
 from uuid import UUID

+from domain.chat import Chat, ChatAttachmentName, ChatFile
 from domain.sandbox import SandboxSession
 from domain.user import User
+from domain.workspace import Workspace, WorkspaceUsage

 AttrValue: TypeAlias = str | int | float | bool
 Attrs: TypeAlias = Mapping[str, AttrValue]
@ -19,6 +21,81 @@ class UserRepository(Protocol):
    def save(self, user: User) -> None: ...


+class WorkspaceRepository(Protocol):
+    def get(self, workspace_id: UUID) -> Workspace | None: ...
+
+    def get_by_user_id(self, user_id: UUID) -> Workspace | None: ...
+
+    def save(self, workspace: Workspace) -> None: ...
+
+
+class ChatRepository(Protocol):
+    def get(self, chat_id: UUID) -> Chat | None: ...
+
+    def list_by_workspace_id(self, workspace_id: UUID) -> list[Chat]: ...
+
+    def save(self, chat: Chat) -> None: ...
+
+    def delete(self, chat_id: UUID) -> None: ...
+
+
+class ChatFileRepository(Protocol):
+    def get(self, file_id: UUID) -> ChatFile | None: ...
+
+    def get_by_chat_id_and_name(
+        self,
+        chat_id: UUID,
+        name: ChatAttachmentName,
+    ) -> ChatFile | None: ...
+
+    def list_by_chat_id(self, chat_id: UUID) -> list[ChatFile]: ...
+
+    def save(self, chat_file: ChatFile) -> None: ...
+
+    def delete(self, file_id: UUID) -> None: ...
+
+    def delete_by_chat_id(self, chat_id: UUID) -> None: ...
+
+
+class ChatStorage(Protocol):
+    def ensure_chat(self, chat: Chat) -> None: ...
+
+    def read_history(self, chat: Chat) -> str: ...
+
+    def write_history(self, chat: Chat, content: str) -> None: ...
+
+    def delete_chat(self, chat: Chat) -> None: ...
+
+    def write_attachment(
+        self,
+        chat: Chat,
+        file_name: ChatAttachmentName,
+        content: bytes,
+    ) -> int: ...
+
+    def read_attachment(self, chat: Chat, file_name: ChatAttachmentName) -> bytes: ...
+
+    def delete_attachment(
+        self,
+        chat: Chat,
+        file_name: ChatAttachmentName,
+    ) -> None: ...
+
+    def clear_attachments(self, chat: Chat) -> None: ...
+
+
+class StorageUsageReader(Protocol):
+    def get_workspace_usage(
+        self,
+        workspace: Workspace,
+        chats: list[Chat],
+    ) -> WorkspaceUsage: ...
+
+
+class IdGenerator(Protocol):
+    def new(self) -> UUID: ...
+
+
 class SandboxSessionRepository(Protocol):
    def get_active_by_chat_id(self, chat_id: UUID) -> SandboxSession | None: ...
Author	SHA1	Message	Date
David Shvarts	6fe484c44c	ref #13 : in-memory metadata repositories (S02)	2026-04-07 20:58:30 +03:00
Azamat	5381c997e2	add storage foundation contracts	2026-04-07 19:31:50 +03:00