repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources15d ago| #1 | #!/usr/bin/env python3 |
| #2 | """ |
| #3 | Mnemosyne Legacy Migration Script |
| #4 | ================================= |
| #5 | |
| #6 | Migrates memories from ephemeral/legacy databases to the PERSISTED canonical path. |
| #7 | |
| #8 | CRITICAL for Fly.io / ephemeral VMs: Only ~/.hermes is persisted across restarts! |
| #9 | - Source: ~/.mnemosyne/data/mnemosyne.db (ephemeral — lost on restart) |
| #10 | - Target: ~/.hermes/mnemosyne/data/mnemosyne.db (persisted) |
| #11 | |
| #12 | Also migrates legacy mnemosyne_native.db files from earlier versions. |
| #13 | |
| #14 | Usage: |
| #15 | python scripts/migrate_from_legacy.py [--dry-run] |
| #16 | |
| #17 | What it does: |
| #18 | 1. Scans ephemeral and legacy database paths |
| #19 | 2. Copies missing memories into the persisted canonical DB |
| #20 | 3. Migrates meaningful non-tool memories into BEAM episodic_memory |
| #21 | 4. Promotes high-importance memories into working_memory |
| #22 | 5. Preserves all existing data (idempotent — safe to run multiple times) |
| #23 | """ |
| #24 | |
| #25 | import argparse |
| #26 | import os |
| #27 | import sqlite3 |
| #28 | import sys |
| #29 | from pathlib import Path |
| #30 | |
| #31 | # Current canonical path (matches mnemosyne.core.beam DEFAULT_DB_PATH) |
| #32 | # NOTE: On Fly.io and other ephemeral VMs, ~/.hermes is the only persisted path |
| #33 | # unless MNEMOSYNE_DATA_DIR explicitly points elsewhere. |
| #34 | CANONICAL_DATA_DIR = Path( |
| #35 | os.environ.get("MNEMOSYNE_DATA_DIR") |
| #36 | or Path.home() / ".hermes" / "mnemosyne" / "data" |
| #37 | ) |
| #38 | CANONICAL_DB = CANONICAL_DATA_DIR / "mnemosyne.db" |
| #39 | |
| #40 | # Legacy / ephemeral paths to scan and migrate from |
| #41 | LEGACY_CANDIDATES = [ |
| #42 | Path.home() / ".mnemosyne" / "data" / "mnemosyne.db", # ephemeral BEAM data |
| #43 | Path.home() / ".mnemosyne" / "data" / "mnemosyne_native.db", |
| #44 | Path.home() / ".hermes" / "mnemosyne" / "data" / "mnemosyne_native.db", |
| #45 | ] |
| #46 | |
| #47 | |
| #48 | def ensure_schema(conn: sqlite3.Connection): |
| #49 | """Ensure the canonical DB has all required BEAM + legacy tables.""" |
| #50 | cursor = conn.cursor() |
| #51 | |
| #52 | # Legacy memories table |
| #53 | cursor.execute(""" |
| #54 | CREATE TABLE IF NOT EXISTS memories ( |
| #55 | id TEXT PRIMARY KEY, |
| #56 | content TEXT NOT NULL, |
| #57 | source TEXT, |
| #58 | timestamp TEXT, |
| #59 | session_id TEXT DEFAULT 'default', |
| #60 | importance REAL DEFAULT 0.5, |
| #61 | metadata_json TEXT, |
| #62 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP |
| #63 | ) |
| #64 | """) |
| #65 | # Some old canonical DBs were created without created_at |
| #66 | cursor.execute("PRAGMA table_info(memories)") |
| #67 | mem_cols = [r[1] for r in cursor.fetchall()] |
| #68 | if "created_at" not in mem_cols: |
| #69 | cursor.execute("ALTER TABLE memories ADD COLUMN created_at TIMESTAMP") |
| #70 | cursor.execute("UPDATE memories SET created_at = CURRENT_TIMESTAMP WHERE created_at IS NULL") |
| #71 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_session ON memories(session_id)") |
| #72 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_timestamp ON memories(timestamp)") |
| #73 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_source ON memories(source)") |
| #74 | |
| #75 | # Legacy embeddings table |
| #76 | cursor.execute(""" |
| #77 | CREATE TABLE IF NOT EXISTS memory_embeddings ( |
| #78 | memory_id TEXT PRIMARY KEY, |
| #79 | embedding_json TEXT NOT NULL, |
| #80 | model TEXT DEFAULT 'bge-small-en-v1.5', |
| #81 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, |
| #82 | FOREIGN KEY (memory_id) REFERENCES memories(id) ON DELETE CASCADE |
| #83 | ) |
| #84 | """) |
| #85 | |
| #86 | # BEAM working_memory |
| #87 | cursor.execute(""" |
| #88 | CREATE TABLE IF NOT EXISTS working_memory ( |
| #89 | id TEXT PRIMARY KEY, |
| #90 | content TEXT NOT NULL, |
| #91 | source TEXT, |
| #92 | timestamp TEXT, |
| #93 | session_id TEXT DEFAULT 'default', |
| #94 | importance REAL DEFAULT 0.5, |
| #95 | metadata_json TEXT, |
| #96 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP |
| #97 | ) |
| #98 | """) |
| #99 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_wm_session ON working_memory(session_id)") |
| #100 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_wm_timestamp ON working_memory(timestamp)") |
| #101 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_wm_source ON working_memory(source)") |
| #102 | |
| #103 | # BEAM episodic_memory |
| #104 | cursor.execute(""" |
| #105 | CREATE TABLE IF NOT EXISTS episodic_memory ( |
| #106 | rowid INTEGER PRIMARY KEY AUTOINCREMENT, |
| #107 | id TEXT UNIQUE NOT NULL, |
| #108 | content TEXT NOT NULL, |
| #109 | source TEXT, |
| #110 | timestamp TEXT, |
| #111 | session_id TEXT DEFAULT 'default', |
| #112 | importance REAL DEFAULT 0.5, |
| #113 | metadata_json TEXT, |
| #114 | summary_of TEXT DEFAULT '', |
| #115 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP |
| #116 | ) |
| #117 | """) |
| #118 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_em_session ON episodic_memory(session_id)") |
| #119 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_em_timestamp ON episodic_memory(timestamp)") |
| #120 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_em_source ON episodic_memory(source)") |
| #121 | |
| #122 | # BEAM scratchpad |
| #123 | cursor.execute(""" |
| #124 | CREATE TABLE IF NOT EXISTS scratchpad ( |
| #125 | id TEXT PRIMARY KEY, |
| #126 | content TEXT NOT NULL, |
| #127 | session_id TEXT DEFAULT 'default', |
| #128 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, |
| #129 | updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP |
| #130 | ) |
| #131 | """) |
| #132 | cursor.execute("CREATE INDEX IF NOT EXISTS idx_sp_session ON scratchpad(session_id)") |
| #133 | |
| #134 | # FTS5 for episodic memory |
| #135 | cursor.execute(""" |
| #136 | CREATE VIRTUAL TABLE IF NOT EXISTS fts_episodes USING fts5( |
| #137 | content, |
| #138 | content='episodic_memory', |
| #139 | content_rowid='rowid' |
| #140 | ) |
| #141 | """) |
| #142 | cursor.execute(""" |
| #143 | CREATE TRIGGER IF NOT EXISTS em_ai AFTER INSERT ON episodic_memory BEGIN |
| #144 | INSERT INTO fts_episodes(rowid, content) VALUES (new.rowid, new.content); |
| #145 | END |
| #146 | """) |
| #147 | cursor.execute(""" |
| #148 | CREATE TRIGGER IF NOT EXISTS em_ad AFTER DELETE ON episodic_memory BEGIN |
| #149 | INSERT INTO fts_episodes(fts_episodes, rowid, content) VALUES ('delete', old.rowid, old.content); |
| #150 | END |
| #151 | """) |
| #152 | |
| #153 | conn.commit() |
| #154 | |
| #155 | |
| #156 | def get_existing_ids(conn: sqlite3.Connection, table: str) -> set: |
| #157 | cursor = conn.cursor() |
| #158 | cursor.execute(f"SELECT id FROM {table}") |
| #159 | return {row[0] for row in cursor.fetchall()} |
| #160 | |
| #161 | |
| #162 | def migrate_legacy_db(legacy_path: Path, canonical_conn: sqlite3.Connection, dry_run: bool = False) -> dict: |
| #163 | """Migrate a single legacy database into the canonical one.""" |
| #164 | stats = {"memories_copied": 0, "embeddings_copied": 0, "episodic_migrated": 0, "working_migrated": 0} |
| #165 | |
| #166 | legacy_conn = sqlite3.connect(str(legacy_path)) |
| #167 | legacy_cursor = legacy_conn.cursor() |
| #168 | |
| #169 | # Check what tables exist |
| #170 | legacy_cursor.execute("SELECT name FROM sqlite_master WHERE type='table'") |
| #171 | tables = {row[0] for row in legacy_cursor.fetchall()} |
| #172 | |
| #173 | if "memories" not in tables: |
| #174 | print(f" ⚠️ No memories table in {legacy_path} — skipping") |
| #175 | legacy_conn.close() |
| #176 | return stats |
| #177 | |
| #178 | canonical_cursor = canonical_conn.cursor() |
| #179 | existing_memory_ids = get_existing_ids(canonical_conn, "memories") |
| #180 | |
| #181 | # 1. Copy memories |
| #182 | legacy_cursor.execute(""" |
| #183 | SELECT id, content, source, timestamp, session_id, importance, metadata_json, created_at |
| #184 | FROM memories |
| #185 | """) |
| #186 | rows = legacy_cursor.fetchall() |
| #187 | to_insert = [row for row in rows if row[0] not in existing_memory_ids] |
| #188 | |
| #189 | if dry_run: |
| #190 | print(f" [DRY-RUN] Would copy {len(to_insert)} memories from {legacy_path}") |
| #191 | else: |
| #192 | for row in to_insert: |
| #193 | canonical_cursor.execute(""" |
| #194 | INSERT INTO memories (id, content, source, timestamp, session_id, importance, metadata_json, created_at) |
| #195 | VALUES (?, ?, ?, ?, ?, ?, ?, ?) |
| #196 | """, row) |
| #197 | canonical_conn.commit() |
| #198 | stats["memories_copied"] = len(to_insert) |
| #199 | |
| #200 | # 2. Copy embeddings if present |
| #201 | if "memory_embeddings" in tables: |
| #202 | legacy_cursor.execute("SELECT memory_id, embedding_json, model, created_at FROM memory_embeddings") |
| #203 | embeddings = legacy_cursor.fetchall() |
| #204 | canonical_cursor.execute("SELECT memory_id FROM memory_embeddings") |
| #205 | existing_emb_ids = {row[0] for row in canonical_cursor.fetchall()} |
| #206 | emb_to_insert = [row for row in embeddings if row[0] not in existing_emb_ids] |
| #207 | |
| #208 | if dry_run: |
| #209 | print(f" [DRY-RUN] Would copy {len(emb_to_insert)} embeddings from {legacy_path}") |
| #210 | else: |
| #211 | for row in emb_to_insert: |
| #212 | canonical_cursor.execute(""" |
| #213 | INSERT INTO memory_embeddings (memory_id, embedding_json, model, created_at) |
| #214 | VALUES (?, ?, ?, ?) |
| #215 | """, row) |
| #216 | canonical_conn.commit() |
| #217 | stats["embeddings_copied"] = len(emb_to_insert) |
| #218 | |
| #219 | # 3. Migrate meaningful non-tool memories into episodic_memory |
| #220 | if not dry_run: |
| #221 | meaningful = [row for row in rows if row[2] != 'tool_execution' and row[0] not in get_existing_ids(canonical_conn, "episodic_memory")] |
| #222 | for row in meaningful: |
| #223 | mid, content, source, timestamp, session_id, importance, metadata_json, created_at = row |
| #224 | canonical_cursor.execute(""" |
| #225 | INSERT INTO episodic_memory (id, content, source, timestamp, session_id, importance, metadata_json, summary_of) |
| #226 | VALUES (?, ?, ?, ?, ?, ?, ?, ?) |
| #227 | """, (mid, content, source, timestamp, 'hermes_default', importance, metadata_json or '{}', '')) |
| #228 | canonical_conn.commit() |
| #229 | stats["episodic_migrated"] = len(meaningful) |
| #230 | |
| #231 | # 4. Promote top high-importance ones into working_memory |
| #232 | hot = [row for row in meaningful if row[0] not in get_existing_ids(canonical_conn, "working_memory")] |
| #233 | hot.sort(key=lambda r: (r[5] or 0.5), reverse=True) |
| #234 | hot = hot[:30] |
| #235 | for row in hot: |
| #236 | mid, content, source, timestamp, session_id, importance, metadata_json, created_at = row |
| #237 | canonical_cursor.execute(""" |
| #238 | INSERT INTO working_memory (id, content, source, timestamp, session_id, importance, metadata_json) |
| #239 | VALUES (?, ?, ?, ?, ?, ?, ?) |
| #240 | """, (mid, content, source, timestamp, 'hermes_default', importance, metadata_json or '{}')) |
| #241 | canonical_conn.commit() |
| #242 | stats["working_migrated"] = len(hot) |
| #243 | else: |
| #244 | meaningful_count = sum(1 for row in rows if row[2] != 'tool_execution') |
| #245 | print(f" [DRY-RUN] Would migrate {meaningful_count} memories to episodic + up to 30 to working") |
| #246 | |
| #247 | legacy_conn.close() |
| #248 | return stats |
| #249 | |
| #250 | |
| #251 | def main(): |
| #252 | parser = argparse.ArgumentParser(description="Migrate legacy Mnemosyne databases to the current canonical path") |
| #253 | parser.add_argument("--dry-run", action="store_true", help="Preview changes without writing") |
| #254 | parser.add_argument("--purge-tools", action="store_true", help="Remove legacy auto-logged tool_execution memories after migration") |
| #255 | args = parser.parse_args() |
| #256 | |
| #257 | print("=" * 60) |
| #258 | print("Mnemosyne Legacy Database Migration") |
| #259 | print("=" * 60) |
| #260 | print(f"Canonical DB: {CANONICAL_DB}") |
| #261 | print() |
| #262 | |
| #263 | CANONICAL_DB.parent.mkdir(parents=True, exist_ok=True) |
| #264 | canonical_conn = sqlite3.connect(str(CANONICAL_DB)) |
| #265 | ensure_schema(canonical_conn) |
| #266 | |
| #267 | # Pre-check stats |
| #268 | cursor = canonical_conn.cursor() |
| #269 | cursor.execute("SELECT COUNT(*) FROM memories") |
| #270 | pre_total = cursor.fetchone()[0] |
| #271 | cursor.execute("SELECT COUNT(*) FROM memories WHERE source = 'tool_execution'") |
| #272 | pre_tools = cursor.fetchone()[0] |
| #273 | print(f"Current canonical DB has {pre_total} memories ({pre_tools} tool_execution)") |
| #274 | |
| #275 | total_stats = {"memories_copied": 0, "embeddings_copied": 0, "episodic_migrated": 0, "working_migrated": 0} |
| #276 | any_found = False |
| #277 | |
| #278 | for legacy_path in LEGACY_CANDIDATES: |
| #279 | if legacy_path.exists() and legacy_path.resolve() != CANONICAL_DB.resolve(): |
| #280 | any_found = True |
| #281 | print(f"\n📁 Found legacy DB: {legacy_path}") |
| #282 | stats = migrate_legacy_db(legacy_path, canonical_conn, dry_run=args.dry_run) |
| #283 | for k in total_stats: |
| #284 | total_stats[k] += stats[k] |
| #285 | |
| #286 | if not any_found and pre_total == 0: |
| #287 | print("\n✅ No legacy databases found and canonical DB is empty. Nothing to migrate.") |
| #288 | canonical_conn.close() |
| #289 | return 0 |
| #290 | |
| #291 | # Purge tool_execution noise if requested |
| #292 | purged_tools = 0 |
| #293 | if args.purge_tools and not args.dry_run: |
| #294 | cursor.execute("DELETE FROM memories WHERE source = 'tool_execution'") |
| #295 | cursor.execute("DELETE FROM working_memory WHERE source = 'tool_execution'") |
| #296 | purged_tools = cursor.rowcount |
| #297 | canonical_conn.commit() |
| #298 | print(f"\n🧹 Purged {purged_tools} tool_execution memories from canonical DB") |
| #299 | elif args.purge_tools and args.dry_run: |
| #300 | cursor.execute("SELECT COUNT(*) FROM memories WHERE source = 'tool_execution'") |
| #301 | would_purge = cursor.fetchone()[0] |
| #302 | print(f"\n[DRY-RUN] Would purge {would_purge} tool_execution memories") |
| #303 | |
| #304 | if args.dry_run: |
| #305 | print("\n🏁 Dry-run complete. No changes were made.") |
| #306 | else: |
| #307 | cursor.execute("SELECT COUNT(*) FROM memories") |
| #308 | post_total = cursor.fetchone()[0] |
| #309 | print("\n🏁 Migration complete!") |
| #310 | print(f" Memories copied: {total_stats['memories_copied']}") |
| #311 | print(f" Embeddings copied: {total_stats['embeddings_copied']}") |
| #312 | print(f" Episodic migrated: {total_stats['episodic_migrated']}") |
| #313 | print(f" Working promoted: {total_stats['working_migrated']}") |
| #314 | if purged_tools: |
| #315 | print(f" Tool memories purged: {purged_tools}") |
| #316 | print(f" Total in canonical: {post_total} (was {pre_total})") |
| #317 | |
| #318 | canonical_conn.close() |
| #319 | return 0 |
| #320 | |
| #321 | |
| #322 | if __name__ == "__main__": |
| #323 | sys.exit(main()) |
| #324 |