Compare commits

...
Sign in to create a new pull request.

2 commits

Author SHA1 Message Date
David Shvarts
6fe484c44c ref #13: in-memory metadata repositories (S02) 2026-04-07 20:58:30 +03:00
Azamat
5381c997e2 add storage foundation contracts 2026-04-07 19:31:50 +03:00
10 changed files with 444 additions and 0 deletions

View file

@ -0,0 +1,17 @@
# 009 Storage foundation
## Context
- v1 storage slice needs workspace, chat and file flows before durable DB
- trusted caller passes `user_id`, and one workspace belongs to one user
- chat content must live outside sandbox lifecycle and survive sandbox restart
## Decision
- metadata repositories are in-memory for the first storage slice
- `Workspace`, `Chat` and `ChatFile` are first-class domain entities
- filesystem access stays behind storage ports in outer layers
- sandbox later integrates through chat metadata and storage ports, not raw path math in usecases
## Consequences
- metadata is lost on restart in this phase
- storage usecases and HTTP API can be built before durable persistence
- later durable metadata can replace in-memory adapters behind the same ports

View file

@ -0,0 +1,17 @@
# 010 Chat history policy
## Context
- v1 keeps chat history in filesystem, not in central DB
- chat metadata must not depend on parsing history content
- each chat already maps to an isolated working directory
## Decision
- each chat owns one `history.md` inside its chat directory
- `history.md` is created with chat layout initialization
- chat metadata stores identity and lifecycle fields separately from history content
- history read and write stay behind storage ports in outer layers
## Consequences
- history survives sandbox restart with chat storage
- metadata and content evolve independently
- later migration to another history backend can keep the same chat identity model

35
domain/chat.py Normal file
View file

@ -0,0 +1,35 @@
from dataclasses import dataclass
from datetime import datetime
from uuid import UUID
HISTORY_FILE_NAME = 'history.md'
@dataclass(frozen=True, slots=True)
class ChatAttachmentName:
value: str
def __post_init__(self) -> None:
if not self.value or self.value in {'.', '..'}:
raise ValueError('invalid attachment name')
if '/' in self.value or '\\' in self.value:
raise ValueError('invalid attachment name')
if self.value == HISTORY_FILE_NAME:
raise ValueError('reserved attachment name')
@dataclass(frozen=True, slots=True)
class Chat:
chat_id: UUID
workspace_id: UUID
created_at: datetime
@dataclass(frozen=True, slots=True)
class ChatFile:
file_id: UUID
chat_id: UUID
name: ChatAttachmentName
content_type: str | None
size_bytes: int
created_at: datetime

View file

@ -1,3 +1,6 @@
from uuid import UUID
class DomainError(Exception):
pass
@ -18,6 +21,48 @@ class UserConflictError(UserError):
self.email = email
class WorkspaceError(DomainError):
pass
class WorkspaceNotFoundError(WorkspaceError):
def __init__(self, workspace_id: UUID) -> None:
super().__init__('workspace_not_found')
self.workspace_id = workspace_id
class WorkspaceQuotaExceededError(WorkspaceError):
def __init__(self, workspace_id: UUID) -> None:
super().__init__('workspace_quota_exceeded')
self.workspace_id = workspace_id
class ChatError(DomainError):
pass
class ChatNotFoundError(ChatError):
def __init__(self, chat_id: UUID) -> None:
super().__init__('chat_not_found')
self.chat_id = chat_id
class ChatHasActiveSandboxError(ChatError):
def __init__(self, chat_id: UUID) -> None:
super().__init__('chat_has_active_sandbox')
self.chat_id = chat_id
class ChatFileError(DomainError):
pass
class ChatFileNotFoundError(ChatFileError):
def __init__(self, file_id: UUID) -> None:
super().__init__('chat_file_not_found')
self.file_id = file_id
class SandboxError(DomainError):
pass

17
domain/workspace.py Normal file
View file

@ -0,0 +1,17 @@
from dataclasses import dataclass
from datetime import datetime
from uuid import UUID
@dataclass(frozen=True, slots=True)
class Workspace:
workspace_id: UUID
user_id: UUID
created_at: datetime
@dataclass(frozen=True, slots=True)
class WorkspaceUsage:
workspace_id: UUID
used_bytes: int
quota_bytes: int

24
repository/chat.py Normal file
View file

@ -0,0 +1,24 @@
from uuid import UUID
from domain.chat import Chat
from usecase.interface import ChatRepository
class InMemoryChatRepository(ChatRepository):
def __init__(self) -> None:
self._by_id: dict[UUID, Chat] = {}
def get(self, chat_id: UUID) -> Chat | None:
return self._by_id.get(chat_id)
def list_by_workspace_id(self, workspace_id: UUID) -> list[Chat]:
return sorted(
(c for c in self._by_id.values() if c.workspace_id == workspace_id),
key=lambda c: (c.created_at, c.chat_id),
)
def save(self, chat: Chat) -> None:
self._by_id[chat.chat_id] = chat
def delete(self, chat_id: UUID) -> None:
self._by_id.pop(chat_id, None)

57
repository/chat_file.py Normal file
View file

@ -0,0 +1,57 @@
from uuid import UUID
from domain.chat import ChatAttachmentName, ChatFile
from usecase.interface import ChatFileRepository
class InMemoryChatFileRepository(ChatFileRepository):
def __init__(self) -> None:
self._by_id: dict[UUID, ChatFile] = {}
self._by_chat_and_name: dict[tuple[UUID, str], UUID] = {}
def get(self, file_id: UUID) -> ChatFile | None:
return self._by_id.get(file_id)
def get_by_chat_id_and_name(
self,
chat_id: UUID,
name: ChatAttachmentName,
) -> ChatFile | None:
fid = self._by_chat_and_name.get((chat_id, name.value))
if fid is None:
return None
return self._by_id.get(fid)
def list_by_chat_id(self, chat_id: UUID) -> list[ChatFile]:
return sorted(
(f for f in self._by_id.values() if f.chat_id == chat_id),
key=lambda f: (f.created_at, f.file_id),
)
def save(self, chat_file: ChatFile) -> None:
key = (chat_file.chat_id, chat_file.name.value)
existing_at_key = self._by_chat_and_name.get(key)
if existing_at_key is not None and existing_at_key != chat_file.file_id:
self._by_id.pop(existing_at_key, None)
previous = self._by_id.get(chat_file.file_id)
if previous is not None:
prev_key = (previous.chat_id, previous.name.value)
if self._by_chat_and_name.get(prev_key) == previous.file_id:
del self._by_chat_and_name[prev_key]
self._by_id[chat_file.file_id] = chat_file
self._by_chat_and_name[key] = chat_file.file_id
def delete(self, file_id: UUID) -> None:
chat_file = self._by_id.pop(file_id, None)
if chat_file is None:
return
key = (chat_file.chat_id, chat_file.name.value)
if self._by_chat_and_name.get(key) == file_id:
del self._by_chat_and_name[key]
def delete_by_chat_id(self, chat_id: UUID) -> None:
file_ids = [f.file_id for f in self._by_id.values() if f.chat_id == chat_id]
for fid in file_ids:
self.delete(fid)

26
repository/workspace.py Normal file
View file

@ -0,0 +1,26 @@
from uuid import UUID
from domain.workspace import Workspace
from usecase.interface import WorkspaceRepository
class InMemoryWorkspaceRepository(WorkspaceRepository):
def __init__(self) -> None:
self._by_id: dict[UUID, Workspace] = {}
self._user_id_to_workspace_id: dict[UUID, UUID] = {}
def get(self, workspace_id: UUID) -> Workspace | None:
return self._by_id.get(workspace_id)
def get_by_user_id(self, user_id: UUID) -> Workspace | None:
wid = self._user_id_to_workspace_id.get(user_id)
if wid is None:
return None
return self._by_id.get(wid)
def save(self, workspace: Workspace) -> None:
existing_wid = self._user_id_to_workspace_id.get(workspace.user_id)
if existing_wid is not None and existing_wid != workspace.workspace_id:
self._by_id.pop(existing_wid, None)
self._by_id[workspace.workspace_id] = workspace
self._user_id_to_workspace_id[workspace.user_id] = workspace.workspace_id

View file

@ -0,0 +1,129 @@
from datetime import UTC, datetime
from uuid import UUID
from domain.chat import Chat, ChatAttachmentName, ChatFile
from domain.workspace import Workspace
from repository.chat import InMemoryChatRepository
from repository.chat_file import InMemoryChatFileRepository
from repository.workspace import InMemoryWorkspaceRepository
USER_A = UUID('aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa')
USER_B = UUID('bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb')
WS_A = UUID('11111111-1111-1111-1111-111111111111')
WS_B = UUID('22222222-2222-2222-2222-222222222222')
CHAT_A = UUID('33333333-3333-3333-3333-333333333333')
CHAT_B = UUID('44444444-4444-4444-4444-444444444444')
FILE_A = UUID('55555555-5555-5555-5555-555555555555')
FILE_B = UUID('66666666-6666-6666-6666-666666666666')
TS = datetime(2026, 4, 1, 12, 0, 0, tzinfo=UTC)
TS_2 = datetime(2026, 4, 1, 13, 0, 0, tzinfo=UTC)
def test_workspace_get_by_user_id() -> None:
repo = InMemoryWorkspaceRepository()
ws = Workspace(workspace_id=WS_A, user_id=USER_A, created_at=TS)
repo.save(ws)
assert repo.get(WS_A) == ws
assert repo.get_by_user_id(USER_A) == ws
assert repo.get_by_user_id(USER_B) is None
def test_workspace_replace_for_user() -> None:
repo = InMemoryWorkspaceRepository()
ws = Workspace(workspace_id=WS_A, user_id=USER_A, created_at=TS)
repo.save(ws)
new_ws = Workspace(workspace_id=WS_B, user_id=USER_A, created_at=TS_2)
repo.save(new_ws)
assert repo.get_by_user_id(USER_A) == new_ws
assert repo.get(WS_A) is None
assert repo.get(WS_B) == new_ws
def test_chat_crud_workspace_scope() -> None:
chat_repo = InMemoryChatRepository()
chat_a = Chat(chat_id=CHAT_A, workspace_id=WS_A, created_at=TS)
chat_b = Chat(chat_id=CHAT_B, workspace_id=WS_A, created_at=TS_2)
chat_repo.save(chat_a)
chat_repo.save(chat_b)
listed = chat_repo.list_by_workspace_id(WS_A)
assert listed == [chat_a, chat_b]
assert chat_repo.get(CHAT_A) == chat_a
chat_repo.delete(CHAT_A)
assert chat_repo.get(CHAT_A) is None
assert chat_repo.list_by_workspace_id(WS_A) == [chat_b]
def test_chat_list_only_same_workspace() -> None:
chat_repo = InMemoryChatRepository()
chat_a = Chat(chat_id=CHAT_A, workspace_id=WS_A, created_at=TS)
chat_b = Chat(chat_id=CHAT_B, workspace_id=WS_B, created_at=TS_2)
chat_repo.save(chat_a)
chat_repo.save(chat_b)
assert chat_repo.list_by_workspace_id(WS_A) == [chat_a]
assert chat_repo.list_by_workspace_id(WS_B) == [chat_b]
def test_chat_file_metadata_save_get_list_delete_clear() -> None:
name_a = ChatAttachmentName('doc.pdf')
name_b = ChatAttachmentName('x.png')
repo = InMemoryChatFileRepository()
f_a = ChatFile(
file_id=FILE_A,
chat_id=CHAT_A,
name=name_a,
content_type='application/pdf',
size_bytes=100,
created_at=TS,
)
f_b = ChatFile(
file_id=FILE_B,
chat_id=CHAT_A,
name=name_b,
content_type='image/png',
size_bytes=200,
created_at=TS_2,
)
repo.save(f_a)
repo.save(f_b)
assert repo.get(FILE_A) == f_a
assert repo.get_by_chat_id_and_name(CHAT_A, name_a) == f_a
listed = repo.list_by_chat_id(CHAT_A)
assert listed == [f_a, f_b]
repo.delete(FILE_A)
assert repo.get(FILE_A) is None
assert repo.get_by_chat_id_and_name(CHAT_A, name_a) is None
repo.save(f_a)
repo.delete_by_chat_id(CHAT_A)
assert repo.list_by_chat_id(CHAT_A) == []
def test_chat_file_same_name_replaced_by_new_id() -> None:
name = ChatAttachmentName('a.txt')
repo = InMemoryChatFileRepository()
first = ChatFile(
file_id=FILE_A,
chat_id=CHAT_A,
name=name,
content_type='text/plain',
size_bytes=1,
created_at=TS,
)
second = ChatFile(
file_id=FILE_B,
chat_id=CHAT_A,
name=name,
content_type='text/plain',
size_bytes=2,
created_at=TS_2,
)
repo.save(first)
repo.save(second)
assert repo.get(FILE_A) is None
assert repo.get(FILE_B) == second
assert repo.get_by_chat_id_and_name(CHAT_A, name) == second

View file

@ -4,8 +4,10 @@ from types import TracebackType
from typing import Protocol, TypeAlias
from uuid import UUID
from domain.chat import Chat, ChatAttachmentName, ChatFile
from domain.sandbox import SandboxSession
from domain.user import User
from domain.workspace import Workspace, WorkspaceUsage
AttrValue: TypeAlias = str | int | float | bool
Attrs: TypeAlias = Mapping[str, AttrValue]
@ -19,6 +21,81 @@ class UserRepository(Protocol):
def save(self, user: User) -> None: ...
class WorkspaceRepository(Protocol):
def get(self, workspace_id: UUID) -> Workspace | None: ...
def get_by_user_id(self, user_id: UUID) -> Workspace | None: ...
def save(self, workspace: Workspace) -> None: ...
class ChatRepository(Protocol):
def get(self, chat_id: UUID) -> Chat | None: ...
def list_by_workspace_id(self, workspace_id: UUID) -> list[Chat]: ...
def save(self, chat: Chat) -> None: ...
def delete(self, chat_id: UUID) -> None: ...
class ChatFileRepository(Protocol):
def get(self, file_id: UUID) -> ChatFile | None: ...
def get_by_chat_id_and_name(
self,
chat_id: UUID,
name: ChatAttachmentName,
) -> ChatFile | None: ...
def list_by_chat_id(self, chat_id: UUID) -> list[ChatFile]: ...
def save(self, chat_file: ChatFile) -> None: ...
def delete(self, file_id: UUID) -> None: ...
def delete_by_chat_id(self, chat_id: UUID) -> None: ...
class ChatStorage(Protocol):
def ensure_chat(self, chat: Chat) -> None: ...
def read_history(self, chat: Chat) -> str: ...
def write_history(self, chat: Chat, content: str) -> None: ...
def delete_chat(self, chat: Chat) -> None: ...
def write_attachment(
self,
chat: Chat,
file_name: ChatAttachmentName,
content: bytes,
) -> int: ...
def read_attachment(self, chat: Chat, file_name: ChatAttachmentName) -> bytes: ...
def delete_attachment(
self,
chat: Chat,
file_name: ChatAttachmentName,
) -> None: ...
def clear_attachments(self, chat: Chat) -> None: ...
class StorageUsageReader(Protocol):
def get_workspace_usage(
self,
workspace: Workspace,
chats: list[Chat],
) -> WorkspaceUsage: ...
class IdGenerator(Protocol):
def new(self) -> UUID: ...
class SandboxSessionRepository(Protocol):
def get_active_by_chat_id(self, chat_id: UUID) -> SandboxSession | None: ...