diff --git a/docs/009-storage-foundation.md b/docs/009-storage-foundation.md new file mode 100644 index 0000000..4aadf58 --- /dev/null +++ b/docs/009-storage-foundation.md @@ -0,0 +1,17 @@ +# 009 Storage foundation + +## Context +- v1 storage slice needs workspace, chat and file flows before durable DB +- trusted caller passes `user_id`, and one workspace belongs to one user +- chat content must live outside sandbox lifecycle and survive sandbox restart + +## Decision +- metadata repositories are in-memory for the first storage slice +- `Workspace`, `Chat` and `ChatFile` are first-class domain entities +- filesystem access stays behind storage ports in outer layers +- sandbox later integrates through chat metadata and storage ports, not raw path math in usecases + +## Consequences +- metadata is lost on restart in this phase +- storage usecases and HTTP API can be built before durable persistence +- later durable metadata can replace in-memory adapters behind the same ports diff --git a/docs/010-chat-history-policy.md b/docs/010-chat-history-policy.md new file mode 100644 index 0000000..39c2125 --- /dev/null +++ b/docs/010-chat-history-policy.md @@ -0,0 +1,17 @@ +# 010 Chat history policy + +## Context +- v1 keeps chat history in filesystem, not in central DB +- chat metadata must not depend on parsing history content +- each chat already maps to an isolated working directory + +## Decision +- each chat owns one `history.md` inside its chat directory +- `history.md` is created with chat layout initialization +- chat metadata stores identity and lifecycle fields separately from history content +- history read and write stay behind storage ports in outer layers + +## Consequences +- history survives sandbox restart with chat storage +- metadata and content evolve independently +- later migration to another history backend can keep the same chat identity model diff --git a/domain/chat.py b/domain/chat.py new file mode 100644 index 0000000..3dbd7cb --- /dev/null +++ b/domain/chat.py @@ -0,0 +1,35 @@ +from dataclasses import dataclass +from datetime import datetime +from uuid import UUID + +HISTORY_FILE_NAME = 'history.md' + + +@dataclass(frozen=True, slots=True) +class ChatAttachmentName: + value: str + + def __post_init__(self) -> None: + if not self.value or self.value in {'.', '..'}: + raise ValueError('invalid attachment name') + if '/' in self.value or '\\' in self.value: + raise ValueError('invalid attachment name') + if self.value == HISTORY_FILE_NAME: + raise ValueError('reserved attachment name') + + +@dataclass(frozen=True, slots=True) +class Chat: + chat_id: UUID + workspace_id: UUID + created_at: datetime + + +@dataclass(frozen=True, slots=True) +class ChatFile: + file_id: UUID + chat_id: UUID + name: ChatAttachmentName + content_type: str | None + size_bytes: int + created_at: datetime diff --git a/domain/error.py b/domain/error.py index f691113..ff3486e 100644 --- a/domain/error.py +++ b/domain/error.py @@ -1,3 +1,6 @@ +from uuid import UUID + + class DomainError(Exception): pass @@ -18,6 +21,48 @@ class UserConflictError(UserError): self.email = email +class WorkspaceError(DomainError): + pass + + +class WorkspaceNotFoundError(WorkspaceError): + def __init__(self, workspace_id: UUID) -> None: + super().__init__('workspace_not_found') + self.workspace_id = workspace_id + + +class WorkspaceQuotaExceededError(WorkspaceError): + def __init__(self, workspace_id: UUID) -> None: + super().__init__('workspace_quota_exceeded') + self.workspace_id = workspace_id + + +class ChatError(DomainError): + pass + + +class ChatNotFoundError(ChatError): + def __init__(self, chat_id: UUID) -> None: + super().__init__('chat_not_found') + self.chat_id = chat_id + + +class ChatHasActiveSandboxError(ChatError): + def __init__(self, chat_id: UUID) -> None: + super().__init__('chat_has_active_sandbox') + self.chat_id = chat_id + + +class ChatFileError(DomainError): + pass + + +class ChatFileNotFoundError(ChatFileError): + def __init__(self, file_id: UUID) -> None: + super().__init__('chat_file_not_found') + self.file_id = file_id + + class SandboxError(DomainError): pass diff --git a/domain/workspace.py b/domain/workspace.py new file mode 100644 index 0000000..3526203 --- /dev/null +++ b/domain/workspace.py @@ -0,0 +1,17 @@ +from dataclasses import dataclass +from datetime import datetime +from uuid import UUID + + +@dataclass(frozen=True, slots=True) +class Workspace: + workspace_id: UUID + user_id: UUID + created_at: datetime + + +@dataclass(frozen=True, slots=True) +class WorkspaceUsage: + workspace_id: UUID + used_bytes: int + quota_bytes: int diff --git a/repository/chat.py b/repository/chat.py new file mode 100644 index 0000000..1d85cc7 --- /dev/null +++ b/repository/chat.py @@ -0,0 +1,24 @@ +from uuid import UUID + +from domain.chat import Chat +from usecase.interface import ChatRepository + + +class InMemoryChatRepository(ChatRepository): + def __init__(self) -> None: + self._by_id: dict[UUID, Chat] = {} + + def get(self, chat_id: UUID) -> Chat | None: + return self._by_id.get(chat_id) + + def list_by_workspace_id(self, workspace_id: UUID) -> list[Chat]: + return sorted( + (c for c in self._by_id.values() if c.workspace_id == workspace_id), + key=lambda c: (c.created_at, c.chat_id), + ) + + def save(self, chat: Chat) -> None: + self._by_id[chat.chat_id] = chat + + def delete(self, chat_id: UUID) -> None: + self._by_id.pop(chat_id, None) diff --git a/repository/chat_file.py b/repository/chat_file.py new file mode 100644 index 0000000..ca18ce9 --- /dev/null +++ b/repository/chat_file.py @@ -0,0 +1,57 @@ +from uuid import UUID + +from domain.chat import ChatAttachmentName, ChatFile +from usecase.interface import ChatFileRepository + + +class InMemoryChatFileRepository(ChatFileRepository): + def __init__(self) -> None: + self._by_id: dict[UUID, ChatFile] = {} + self._by_chat_and_name: dict[tuple[UUID, str], UUID] = {} + + def get(self, file_id: UUID) -> ChatFile | None: + return self._by_id.get(file_id) + + def get_by_chat_id_and_name( + self, + chat_id: UUID, + name: ChatAttachmentName, + ) -> ChatFile | None: + fid = self._by_chat_and_name.get((chat_id, name.value)) + if fid is None: + return None + return self._by_id.get(fid) + + def list_by_chat_id(self, chat_id: UUID) -> list[ChatFile]: + return sorted( + (f for f in self._by_id.values() if f.chat_id == chat_id), + key=lambda f: (f.created_at, f.file_id), + ) + + def save(self, chat_file: ChatFile) -> None: + key = (chat_file.chat_id, chat_file.name.value) + existing_at_key = self._by_chat_and_name.get(key) + if existing_at_key is not None and existing_at_key != chat_file.file_id: + self._by_id.pop(existing_at_key, None) + + previous = self._by_id.get(chat_file.file_id) + if previous is not None: + prev_key = (previous.chat_id, previous.name.value) + if self._by_chat_and_name.get(prev_key) == previous.file_id: + del self._by_chat_and_name[prev_key] + + self._by_id[chat_file.file_id] = chat_file + self._by_chat_and_name[key] = chat_file.file_id + + def delete(self, file_id: UUID) -> None: + chat_file = self._by_id.pop(file_id, None) + if chat_file is None: + return + key = (chat_file.chat_id, chat_file.name.value) + if self._by_chat_and_name.get(key) == file_id: + del self._by_chat_and_name[key] + + def delete_by_chat_id(self, chat_id: UUID) -> None: + file_ids = [f.file_id for f in self._by_id.values() if f.chat_id == chat_id] + for fid in file_ids: + self.delete(fid) diff --git a/repository/workspace.py b/repository/workspace.py new file mode 100644 index 0000000..4aa5546 --- /dev/null +++ b/repository/workspace.py @@ -0,0 +1,26 @@ +from uuid import UUID + +from domain.workspace import Workspace +from usecase.interface import WorkspaceRepository + + +class InMemoryWorkspaceRepository(WorkspaceRepository): + def __init__(self) -> None: + self._by_id: dict[UUID, Workspace] = {} + self._user_id_to_workspace_id: dict[UUID, UUID] = {} + + def get(self, workspace_id: UUID) -> Workspace | None: + return self._by_id.get(workspace_id) + + def get_by_user_id(self, user_id: UUID) -> Workspace | None: + wid = self._user_id_to_workspace_id.get(user_id) + if wid is None: + return None + return self._by_id.get(wid) + + def save(self, workspace: Workspace) -> None: + existing_wid = self._user_id_to_workspace_id.get(workspace.user_id) + if existing_wid is not None and existing_wid != workspace.workspace_id: + self._by_id.pop(existing_wid, None) + self._by_id[workspace.workspace_id] = workspace + self._user_id_to_workspace_id[workspace.user_id] = workspace.workspace_id diff --git a/test/test_storage_metadata_repositories.py b/test/test_storage_metadata_repositories.py new file mode 100644 index 0000000..b03d56e --- /dev/null +++ b/test/test_storage_metadata_repositories.py @@ -0,0 +1,129 @@ +from datetime import UTC, datetime +from uuid import UUID + +from domain.chat import Chat, ChatAttachmentName, ChatFile +from domain.workspace import Workspace +from repository.chat import InMemoryChatRepository +from repository.chat_file import InMemoryChatFileRepository +from repository.workspace import InMemoryWorkspaceRepository + +USER_A = UUID('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa') +USER_B = UUID('bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb') +WS_A = UUID('11111111-1111-1111-1111-111111111111') +WS_B = UUID('22222222-2222-2222-2222-222222222222') +CHAT_A = UUID('33333333-3333-3333-3333-333333333333') +CHAT_B = UUID('44444444-4444-4444-4444-444444444444') +FILE_A = UUID('55555555-5555-5555-5555-555555555555') +FILE_B = UUID('66666666-6666-6666-6666-666666666666') +TS = datetime(2026, 4, 1, 12, 0, 0, tzinfo=UTC) +TS_2 = datetime(2026, 4, 1, 13, 0, 0, tzinfo=UTC) + + +def test_workspace_get_by_user_id() -> None: + repo = InMemoryWorkspaceRepository() + ws = Workspace(workspace_id=WS_A, user_id=USER_A, created_at=TS) + repo.save(ws) + assert repo.get(WS_A) == ws + assert repo.get_by_user_id(USER_A) == ws + assert repo.get_by_user_id(USER_B) is None + + +def test_workspace_replace_for_user() -> None: + repo = InMemoryWorkspaceRepository() + ws = Workspace(workspace_id=WS_A, user_id=USER_A, created_at=TS) + repo.save(ws) + new_ws = Workspace(workspace_id=WS_B, user_id=USER_A, created_at=TS_2) + repo.save(new_ws) + assert repo.get_by_user_id(USER_A) == new_ws + assert repo.get(WS_A) is None + assert repo.get(WS_B) == new_ws + + +def test_chat_crud_workspace_scope() -> None: + chat_repo = InMemoryChatRepository() + chat_a = Chat(chat_id=CHAT_A, workspace_id=WS_A, created_at=TS) + chat_b = Chat(chat_id=CHAT_B, workspace_id=WS_A, created_at=TS_2) + chat_repo.save(chat_a) + chat_repo.save(chat_b) + + listed = chat_repo.list_by_workspace_id(WS_A) + assert listed == [chat_a, chat_b] + + assert chat_repo.get(CHAT_A) == chat_a + chat_repo.delete(CHAT_A) + assert chat_repo.get(CHAT_A) is None + assert chat_repo.list_by_workspace_id(WS_A) == [chat_b] + + +def test_chat_list_only_same_workspace() -> None: + chat_repo = InMemoryChatRepository() + chat_a = Chat(chat_id=CHAT_A, workspace_id=WS_A, created_at=TS) + chat_b = Chat(chat_id=CHAT_B, workspace_id=WS_B, created_at=TS_2) + chat_repo.save(chat_a) + chat_repo.save(chat_b) + assert chat_repo.list_by_workspace_id(WS_A) == [chat_a] + assert chat_repo.list_by_workspace_id(WS_B) == [chat_b] + + +def test_chat_file_metadata_save_get_list_delete_clear() -> None: + name_a = ChatAttachmentName('doc.pdf') + name_b = ChatAttachmentName('x.png') + + repo = InMemoryChatFileRepository() + f_a = ChatFile( + file_id=FILE_A, + chat_id=CHAT_A, + name=name_a, + content_type='application/pdf', + size_bytes=100, + created_at=TS, + ) + f_b = ChatFile( + file_id=FILE_B, + chat_id=CHAT_A, + name=name_b, + content_type='image/png', + size_bytes=200, + created_at=TS_2, + ) + repo.save(f_a) + repo.save(f_b) + + assert repo.get(FILE_A) == f_a + assert repo.get_by_chat_id_and_name(CHAT_A, name_a) == f_a + listed = repo.list_by_chat_id(CHAT_A) + assert listed == [f_a, f_b] + + repo.delete(FILE_A) + assert repo.get(FILE_A) is None + assert repo.get_by_chat_id_and_name(CHAT_A, name_a) is None + + repo.save(f_a) + repo.delete_by_chat_id(CHAT_A) + assert repo.list_by_chat_id(CHAT_A) == [] + + +def test_chat_file_same_name_replaced_by_new_id() -> None: + name = ChatAttachmentName('a.txt') + repo = InMemoryChatFileRepository() + first = ChatFile( + file_id=FILE_A, + chat_id=CHAT_A, + name=name, + content_type='text/plain', + size_bytes=1, + created_at=TS, + ) + second = ChatFile( + file_id=FILE_B, + chat_id=CHAT_A, + name=name, + content_type='text/plain', + size_bytes=2, + created_at=TS_2, + ) + repo.save(first) + repo.save(second) + assert repo.get(FILE_A) is None + assert repo.get(FILE_B) == second + assert repo.get_by_chat_id_and_name(CHAT_A, name) == second diff --git a/usecase/interface.py b/usecase/interface.py index 69876e6..de681d6 100644 --- a/usecase/interface.py +++ b/usecase/interface.py @@ -4,8 +4,10 @@ from types import TracebackType from typing import Protocol, TypeAlias from uuid import UUID +from domain.chat import Chat, ChatAttachmentName, ChatFile from domain.sandbox import SandboxSession from domain.user import User +from domain.workspace import Workspace, WorkspaceUsage AttrValue: TypeAlias = str | int | float | bool Attrs: TypeAlias = Mapping[str, AttrValue] @@ -19,6 +21,81 @@ class UserRepository(Protocol): def save(self, user: User) -> None: ... +class WorkspaceRepository(Protocol): + def get(self, workspace_id: UUID) -> Workspace | None: ... + + def get_by_user_id(self, user_id: UUID) -> Workspace | None: ... + + def save(self, workspace: Workspace) -> None: ... + + +class ChatRepository(Protocol): + def get(self, chat_id: UUID) -> Chat | None: ... + + def list_by_workspace_id(self, workspace_id: UUID) -> list[Chat]: ... + + def save(self, chat: Chat) -> None: ... + + def delete(self, chat_id: UUID) -> None: ... + + +class ChatFileRepository(Protocol): + def get(self, file_id: UUID) -> ChatFile | None: ... + + def get_by_chat_id_and_name( + self, + chat_id: UUID, + name: ChatAttachmentName, + ) -> ChatFile | None: ... + + def list_by_chat_id(self, chat_id: UUID) -> list[ChatFile]: ... + + def save(self, chat_file: ChatFile) -> None: ... + + def delete(self, file_id: UUID) -> None: ... + + def delete_by_chat_id(self, chat_id: UUID) -> None: ... + + +class ChatStorage(Protocol): + def ensure_chat(self, chat: Chat) -> None: ... + + def read_history(self, chat: Chat) -> str: ... + + def write_history(self, chat: Chat, content: str) -> None: ... + + def delete_chat(self, chat: Chat) -> None: ... + + def write_attachment( + self, + chat: Chat, + file_name: ChatAttachmentName, + content: bytes, + ) -> int: ... + + def read_attachment(self, chat: Chat, file_name: ChatAttachmentName) -> bytes: ... + + def delete_attachment( + self, + chat: Chat, + file_name: ChatAttachmentName, + ) -> None: ... + + def clear_attachments(self, chat: Chat) -> None: ... + + +class StorageUsageReader(Protocol): + def get_workspace_usage( + self, + workspace: Workspace, + chats: list[Chat], + ) -> WorkspaceUsage: ... + + +class IdGenerator(Protocol): + def new(self) -> UUID: ... + + class SandboxSessionRepository(Protocol): def get_active_by_chat_id(self, chat_id: UUID) -> SandboxSession | None: ...