refactor: improve memory entry handling and file operations

- Replaced file locking with atomic file operations using temporary files to prevent race conditions during read/write.
- Added deduplication of memory and user entries to avoid exact duplicates in the memory store.
- Enhanced error handling for duplicate entries and improved logic for managing multiple matches in memory operations.
- Updated docstrings to clarify the behavior of file reading and writing methods, ensuring better understanding of the implementation.
This commit is contained in:
teknium1 2026-02-20 02:32:15 -08:00
parent 3b90fa5c9b
commit ba8b80a163

View file

@ -25,7 +25,7 @@ Design:
import json import json
import os import os
import fcntl import tempfile
from pathlib import Path from pathlib import Path
from typing import Dict, Any, List, Optional from typing import Dict, Any, List, Optional
@ -61,6 +61,10 @@ class MemoryStore:
self.memory_entries = self._read_file(MEMORY_DIR / "MEMORY.md") self.memory_entries = self._read_file(MEMORY_DIR / "MEMORY.md")
self.user_entries = self._read_file(MEMORY_DIR / "USER.md") self.user_entries = self._read_file(MEMORY_DIR / "USER.md")
# Deduplicate entries (preserves order, keeps first occurrence)
self.memory_entries = list(dict.fromkeys(self.memory_entries))
self.user_entries = list(dict.fromkeys(self.user_entries))
# Capture frozen snapshot for system prompt injection # Capture frozen snapshot for system prompt injection
self._system_prompt_snapshot = { self._system_prompt_snapshot = {
"memory": self._render_block("memory", self.memory_entries), "memory": self._render_block("memory", self.memory_entries),
@ -107,6 +111,10 @@ class MemoryStore:
entries = self._entries_for(target) entries = self._entries_for(target)
limit = self._char_limit(target) limit = self._char_limit(target)
# Reject exact duplicates
if content in entries:
return self._success_response(target, "Entry already exists (no duplicate added).")
# Calculate what the new total would be # Calculate what the new total would be
new_entries = entries + [content] new_entries = entries + [content]
new_total = len(ENTRY_DELIMITER.join(new_entries)) new_total = len(ENTRY_DELIMITER.join(new_entries))
@ -146,12 +154,16 @@ class MemoryStore:
return {"success": False, "error": f"No entry matched '{old_text}'."} return {"success": False, "error": f"No entry matched '{old_text}'."}
if len(matches) > 1: if len(matches) > 1:
previews = [e[:80] + ("..." if len(e) > 80 else "") for _, e in matches] # If all matches are identical (exact duplicates), operate on the first one
return { unique_texts = set(e for _, e in matches)
"success": False, if len(unique_texts) > 1:
"error": f"Multiple entries matched '{old_text}'. Be more specific.", previews = [e[:80] + ("..." if len(e) > 80 else "") for _, e in matches]
"matches": previews, return {
} "success": False,
"error": f"Multiple entries matched '{old_text}'. Be more specific.",
"matches": previews,
}
# All identical -- safe to replace just the first
idx = matches[0][0] idx = matches[0][0]
limit = self._char_limit(target) limit = self._char_limit(target)
@ -189,12 +201,16 @@ class MemoryStore:
return {"success": False, "error": f"No entry matched '{old_text}'."} return {"success": False, "error": f"No entry matched '{old_text}'."}
if len(matches) > 1: if len(matches) > 1:
previews = [e[:80] + ("..." if len(e) > 80 else "") for _, e in matches] # If all matches are identical (exact duplicates), remove the first one
return { unique_texts = set(e for _, e in matches)
"success": False, if len(unique_texts) > 1:
"error": f"Multiple entries matched '{old_text}'. Be more specific.", previews = [e[:80] + ("..." if len(e) > 80 else "") for _, e in matches]
"matches": previews, return {
} "success": False,
"error": f"Multiple entries matched '{old_text}'. Be more specific.",
"matches": previews,
}
# All identical -- safe to remove just the first
idx = matches[0][0] idx = matches[0][0]
entries.pop(idx) entries.pop(idx)
@ -255,16 +271,15 @@ class MemoryStore:
@staticmethod @staticmethod
def _read_file(path: Path) -> List[str]: def _read_file(path: Path) -> List[str]:
"""Read a memory file and split into entries.""" """Read a memory file and split into entries.
No file locking needed: _write_file uses atomic rename, so readers
always see either the previous complete file or the new complete file.
"""
if not path.exists(): if not path.exists():
return [] return []
try: try:
with open(path, "r", encoding="utf-8") as f: raw = path.read_text(encoding="utf-8")
fcntl.flock(f, fcntl.LOCK_SH)
try:
raw = f.read()
finally:
fcntl.flock(f, fcntl.LOCK_UN)
except (OSError, IOError): except (OSError, IOError):
return [] return []
@ -276,15 +291,32 @@ class MemoryStore:
@staticmethod @staticmethod
def _write_file(path: Path, entries: List[str]): def _write_file(path: Path, entries: List[str]):
"""Write entries to a memory file with file locking.""" """Write entries to a memory file using atomic temp-file + rename.
Previous implementation used open("w") + flock, but "w" truncates the
file *before* the lock is acquired, creating a race window where
concurrent readers see an empty file. Atomic rename avoids this:
readers always see either the old complete file or the new one.
"""
content = ENTRY_DELIMITER.join(entries) if entries else "" content = ENTRY_DELIMITER.join(entries) if entries else ""
try: try:
with open(path, "w", encoding="utf-8") as f: # Write to temp file in same directory (same filesystem for atomic rename)
fcntl.flock(f, fcntl.LOCK_EX) fd, tmp_path = tempfile.mkstemp(
try: dir=str(path.parent), suffix=".tmp", prefix=".mem_"
)
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
f.write(content) f.write(content)
finally: f.flush()
fcntl.flock(f, fcntl.LOCK_UN) os.fsync(f.fileno())
os.replace(tmp_path, str(path)) # Atomic on same filesystem
except BaseException:
# Clean up temp file on any failure
try:
os.unlink(tmp_path)
except OSError:
pass
raise
except (OSError, IOError) as e: except (OSError, IOError) as e:
raise RuntimeError(f"Failed to write memory file {path}: {e}") raise RuntimeError(f"Failed to write memory file {path}: {e}")
@ -376,3 +408,11 @@ MEMORY_SCHEMA = {
"required": ["action", "target"], "required": ["action", "target"],
}, },
} }