fix: track origin hashes in skills manifest to preserve user modifications

Upgrade skills_sync manifest to v2 format (name:origin_hash). The origin
hash records the MD5 of the bundled skill at the time it was last synced.

On update, the user's copy is compared against the origin hash:
- User copy == origin hash → unmodified → safe to update from bundled
- User copy != origin hash → user customized → skip (preserve changes)

v1 manifests (plain names) are auto-migrated: the user's current hash
becomes the baseline, so future syncs can detect modifications.

Output now shows user-modified skills:
  ~ whisper (user-modified, skipping)

27 tests covering all scenarios including v1→v2 migration, user
modification detection, update after migration, and origin hash tracking.
2009 tests pass.
This commit is contained in:
teknium1 2026-03-06 16:13:47 -08:00
parent 6d3804770c
commit 4f56e31dc7
2 changed files with 280 additions and 102 deletions

View file

@ -3,16 +3,22 @@
Skills Sync -- Manifest-based seeding and updating of bundled skills.
Copies bundled skills from the repo's skills/ directory into ~/.hermes/skills/
and uses a manifest to track which skills have been offered.
and uses a manifest to track which skills have been synced and their origin hash.
Behavior:
- NEW skills (not in manifest): copied to user dir, added to manifest.
- EXISTING skills (in manifest, present in user dir): UPDATED from bundled.
- DELETED by user (in manifest, absent from user dir): respected -- not re-added.
Manifest format (v2): each line is "skill_name:origin_hash" where origin_hash
is the MD5 of the bundled skill at the time it was last synced to the user dir.
Old v1 manifests (plain names without hashes) are auto-migrated.
Update logic:
- NEW skills (not in manifest): copied to user dir, origin hash recorded.
- EXISTING skills (in manifest, present in user dir):
* If user copy matches origin hash: user hasn't modified it → safe to
update from bundled if bundled changed. New origin hash recorded.
* If user copy differs from origin hash: user customized it SKIP.
- DELETED by user (in manifest, absent from user dir): respected, not re-added.
- REMOVED from bundled (in manifest, gone from repo): cleaned from manifest.
The manifest lives at ~/.hermes/skills/.bundled_manifest and is a simple
newline-delimited list of skill names that have been offered to the user.
The manifest lives at ~/.hermes/skills/.bundled_manifest.
"""
import hashlib
@ -20,7 +26,7 @@ import logging
import os
import shutil
from pathlib import Path
from typing import List, Tuple
from typing import Dict, List, Tuple
logger = logging.getLogger(__name__)
@ -35,27 +41,38 @@ def _get_bundled_dir() -> Path:
return Path(__file__).parent.parent / "skills"
def _read_manifest() -> set:
"""Read the set of skill names already offered to the user."""
def _read_manifest() -> Dict[str, str]:
"""
Read the manifest as a dict of {skill_name: origin_hash}.
Handles both v1 (plain names) and v2 (name:hash) formats.
v1 entries get an empty hash string which triggers migration on next sync.
"""
if not MANIFEST_FILE.exists():
return set()
return {}
try:
return set(
line.strip()
for line in MANIFEST_FILE.read_text(encoding="utf-8").splitlines()
if line.strip()
)
result = {}
for line in MANIFEST_FILE.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line:
continue
if ":" in line:
# v2 format: name:hash
name, _, hash_val = line.partition(":")
result[name.strip()] = hash_val.strip()
else:
# v1 format: plain name — empty hash triggers migration
result[line] = ""
return result
except (OSError, IOError):
return set()
return {}
def _write_manifest(names: set):
"""Write the manifest file."""
def _write_manifest(entries: Dict[str, str]):
"""Write the manifest file in v2 format (name:hash)."""
MANIFEST_FILE.parent.mkdir(parents=True, exist_ok=True)
MANIFEST_FILE.write_text(
"\n".join(sorted(names)) + "\n",
encoding="utf-8",
)
lines = [f"{name}:{hash_val}" for name, hash_val in sorted(entries.items())]
MANIFEST_FILE.write_text("\n".join(lines) + "\n", encoding="utf-8")
def _discover_bundled_skills(bundled_dir: Path) -> List[Tuple[str, Path]]:
@ -105,18 +122,16 @@ def sync_skills(quiet: bool = False) -> dict:
"""
Sync bundled skills into ~/.hermes/skills/ using the manifest.
- NEW skills (not in manifest): copied to user dir, added to manifest.
- EXISTING skills (in manifest, present in user dir): updated from bundled.
- DELETED by user (in manifest, absent from user dir): respected, not re-added.
- REMOVED from bundled (in manifest, gone from repo): cleaned from manifest.
Returns:
dict with keys: copied (list), updated (list), skipped (int),
cleaned (list), total_bundled (int)
user_modified (list), cleaned (list), total_bundled (int)
"""
bundled_dir = _get_bundled_dir()
if not bundled_dir.exists():
return {"copied": [], "updated": [], "skipped": 0, "cleaned": [], "total_bundled": 0}
return {
"copied": [], "updated": [], "skipped": 0,
"user_modified": [], "cleaned": [], "total_bundled": 0,
}
SKILLS_DIR.mkdir(parents=True, exist_ok=True)
manifest = _read_manifest()
@ -125,16 +140,18 @@ def sync_skills(quiet: bool = False) -> dict:
copied = []
updated = []
user_modified = []
skipped = 0
for skill_name, skill_src in bundled_skills:
dest = _compute_relative_dest(skill_src, bundled_dir)
bundled_hash = _dir_hash(skill_src)
if skill_name not in manifest:
# New skill -- never offered before
# ── New skill — never offered before ──
try:
if dest.exists():
# User already has a skill with the same name (unlikely but possible)
# User already has a skill with the same name — don't overwrite
skipped += 1
else:
dest.parent.mkdir(parents=True, exist_ok=True)
@ -145,16 +162,37 @@ def sync_skills(quiet: bool = False) -> dict:
except (OSError, IOError) as e:
if not quiet:
print(f" ! Failed to copy {skill_name}: {e}")
manifest.add(skill_name)
manifest[skill_name] = bundled_hash
elif dest.exists():
# Existing skill in manifest AND on disk -- check for updates
src_hash = _dir_hash(skill_src)
dst_hash = _dir_hash(dest)
if src_hash != dst_hash:
# ── Existing skill — in manifest AND on disk ──
origin_hash = manifest.get(skill_name, "")
user_hash = _dir_hash(dest)
if not origin_hash:
# v1 migration: no origin hash recorded. Set baseline from
# user's current copy so future syncs can detect modifications.
manifest[skill_name] = user_hash
if user_hash == bundled_hash:
skipped += 1 # already in sync
else:
# Can't tell if user modified or bundled changed — be safe
skipped += 1
continue
if user_hash != origin_hash:
# User modified this skill — don't overwrite their changes
user_modified.append(skill_name)
if not quiet:
print(f" ~ {skill_name} (user-modified, skipping)")
continue
# User copy matches origin — check if bundled has a newer version
if bundled_hash != origin_hash:
try:
shutil.rmtree(dest)
shutil.copytree(skill_src, dest)
manifest[skill_name] = bundled_hash
updated.append(skill_name)
if not quiet:
print(f"{skill_name} (updated)")
@ -162,15 +200,16 @@ def sync_skills(quiet: bool = False) -> dict:
if not quiet:
print(f" ! Failed to update {skill_name}: {e}")
else:
skipped += 1
skipped += 1 # bundled unchanged, user unchanged
else:
# In manifest but not on disk -- user deleted it, respect that
# ── In manifest but not on disk — user deleted it ──
skipped += 1
# Clean stale manifest entries (skills removed from bundled dir)
cleaned = sorted(manifest - bundled_names)
manifest -= set(cleaned)
cleaned = sorted(set(manifest.keys()) - bundled_names)
for name in cleaned:
del manifest[name]
# Also copy DESCRIPTION.md files for categories (if not already present)
for desc_md in bundled_dir.rglob("DESCRIPTION.md"):
@ -189,6 +228,7 @@ def sync_skills(quiet: bool = False) -> dict:
"copied": copied,
"updated": updated,
"skipped": skipped,
"user_modified": user_modified,
"cleaned": cleaned,
"total_bundled": len(bundled_skills),
}
@ -197,6 +237,13 @@ def sync_skills(quiet: bool = False) -> dict:
if __name__ == "__main__":
print("Syncing bundled skills into ~/.hermes/skills/ ...")
result = sync_skills(quiet=False)
print(f"\nDone: {len(result['copied'])} new, {len(result['updated'])} updated, "
f"{result['skipped']} unchanged, {len(result['cleaned'])} cleaned from manifest, "
f"{result['total_bundled']} total bundled.")
parts = [
f"{len(result['copied'])} new",
f"{len(result['updated'])} updated",
f"{result['skipped']} unchanged",
]
if result["user_modified"]:
parts.append(f"{len(result['user_modified'])} user-modified (kept)")
if result["cleaned"]:
parts.append(f"{len(result['cleaned'])} cleaned from manifest")
print(f"\nDone: {', '.join(parts)}. {result['total_bundled']} total bundled.")