repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources15d ago| #1 | """ |
| #2 | Tests for Mnemosyne BEAM architecture |
| #3 | """ |
| #4 | |
| #5 | import pytest |
| #6 | import tempfile |
| #7 | import sqlite3 |
| #8 | from pathlib import Path |
| #9 | from datetime import datetime, timedelta |
| #10 | |
| #11 | from mnemosyne.core.beam import BeamMemory, init_beam |
| #12 | from mnemosyne.core.memory import Mnemosyne |
| #13 | |
| #14 | |
| #15 | @pytest.fixture |
| #16 | def temp_db(): |
| #17 | with tempfile.TemporaryDirectory() as tmpdir: |
| #18 | db_path = Path(tmpdir) / "test.db" |
| #19 | yield db_path |
| #20 | |
| #21 | |
| #22 | class TestBeamSchema: |
| #23 | def test_init_creates_tables(self, temp_db): |
| #24 | init_beam(temp_db) |
| #25 | conn = sqlite3.connect(temp_db) |
| #26 | cursor = conn.cursor() |
| #27 | tables = [r[0] for r in cursor.execute( |
| #28 | "SELECT name FROM sqlite_master WHERE type='table'" |
| #29 | ).fetchall()] |
| #30 | assert "working_memory" in tables |
| #31 | assert "episodic_memory" in tables |
| #32 | assert "scratchpad" in tables |
| #33 | assert "consolidation_log" in tables |
| #34 | # FTS5 virtual table |
| #35 | assert "fts_episodes" in tables |
| #36 | conn.close() |
| #37 | |
| #38 | |
| #39 | class TestWorkingMemory: |
| #40 | def test_remember_and_context(self, temp_db): |
| #41 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #42 | mid = beam.remember("Prefers Neovim", source="preference", importance=0.9) |
| #43 | assert mid is not None |
| #44 | |
| #45 | ctx = beam.get_context(limit=5) |
| #46 | assert len(ctx) == 1 |
| #47 | assert ctx[0]["content"] == "Prefers Neovim" |
| #48 | |
| #49 | def test_trim_old_memories(self, temp_db): |
| #50 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #51 | # Insert old memory directly |
| #52 | conn = sqlite3.connect(temp_db) |
| #53 | old_ts = (datetime.now() - timedelta(hours=25)).isoformat() |
| #54 | conn.execute( |
| #55 | "INSERT INTO working_memory (id, content, source, timestamp, session_id) VALUES (?, ?, ?, ?, ?)", |
| #56 | ("old1", "old content", "conversation", old_ts, "s1") |
| #57 | ) |
| #58 | conn.commit() |
| #59 | conn.close() |
| #60 | |
| #61 | beam._trim_working_memory() |
| #62 | stats = beam.get_working_stats() |
| #63 | assert stats["total"] == 0 |
| #64 | |
| #65 | |
| #66 | class TestEpisodicMemory: |
| #67 | def test_consolidate_and_recall(self, temp_db): |
| #68 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #69 | eid = beam.consolidate_to_episodic( |
| #70 | summary="User likes dark mode", |
| #71 | source_wm_ids=["wm1"], |
| #72 | importance=0.8 |
| #73 | ) |
| #74 | assert eid is not None |
| #75 | |
| #76 | results = beam.recall("dark mode") |
| #77 | assert len(results) >= 1 |
| #78 | assert any(r["tier"] == "episodic" for r in results) |
| #79 | |
| #80 | def test_recall_hybrid_ranking(self, temp_db): |
| #81 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #82 | beam.consolidate_to_episodic("Python is the best language", ["a"], importance=0.7) |
| #83 | beam.consolidate_to_episodic("Rust is great for systems", ["b"], importance=0.7) |
| #84 | |
| #85 | results = beam.recall("best programming language") |
| #86 | assert len(results) >= 1 |
| #87 | |
| #88 | |
| #89 | class TestScratchpad: |
| #90 | def test_scratchpad_write_read_clear(self, temp_db): |
| #91 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #92 | beam.scratchpad_write("todo: fix auth") |
| #93 | entries = beam.scratchpad_read() |
| #94 | assert len(entries) == 1 |
| #95 | assert "fix auth" in entries[0]["content"] |
| #96 | |
| #97 | beam.scratchpad_clear() |
| #98 | assert len(beam.scratchpad_read()) == 0 |
| #99 | |
| #100 | |
| #101 | class TestSleepCycle: |
| #102 | def test_sleep_consolidates_old_memories(self, temp_db): |
| #103 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #104 | # Inject old working memories |
| #105 | conn = sqlite3.connect(temp_db) |
| #106 | old_ts = (datetime.now() - timedelta(hours=20)).isoformat() |
| #107 | for i in range(3): |
| #108 | conn.execute( |
| #109 | "INSERT INTO working_memory (id, content, source, timestamp, session_id) VALUES (?, ?, ?, ?, ?)", |
| #110 | (f"old{i}", f"task {i}", "conversation", old_ts, "s1") |
| #111 | ) |
| #112 | conn.commit() |
| #113 | conn.close() |
| #114 | |
| #115 | result = beam.sleep(dry_run=False) |
| #116 | assert result["status"] == "consolidated" |
| #117 | assert result["items_consolidated"] == 3 |
| #118 | |
| #119 | log = beam.get_consolidation_log(limit=1) |
| #120 | assert len(log) == 1 |
| #121 | assert log[0]["items_consolidated"] == 3 |
| #122 | |
| #123 | def test_sleep_dry_run(self, temp_db): |
| #124 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #125 | conn = sqlite3.connect(temp_db) |
| #126 | old_ts = (datetime.now() - timedelta(hours=20)).isoformat() |
| #127 | conn.execute( |
| #128 | "INSERT INTO working_memory (id, content, source, timestamp, session_id) VALUES (?, ?, ?, ?, ?)", |
| #129 | ("old1", "task one", "conversation", old_ts, "s1") |
| #130 | ) |
| #131 | conn.commit() |
| #132 | conn.close() |
| #133 | |
| #134 | result = beam.sleep(dry_run=True) |
| #135 | assert result["status"] == "dry_run" |
| #136 | assert result["items_consolidated"] == 1 |
| #137 | # Should not actually delete |
| #138 | stats = beam.get_working_stats() |
| #139 | assert stats["total"] == 1 |
| #140 | |
| #141 | def test_sleep_remains_session_scoped(self, temp_db): |
| #142 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #143 | conn = sqlite3.connect(temp_db) |
| #144 | old_ts = (datetime.now() - timedelta(hours=20)).isoformat() |
| #145 | conn.executemany( |
| #146 | "INSERT INTO working_memory (id, content, source, timestamp, session_id) VALUES (?, ?, ?, ?, ?)", |
| #147 | [ |
| #148 | ("s1-old", "session one task", "conversation", old_ts, "s1"), |
| #149 | ("s2-old", "session two task", "conversation", old_ts, "s2"), |
| #150 | ] |
| #151 | ) |
| #152 | conn.commit() |
| #153 | conn.close() |
| #154 | |
| #155 | result = beam.sleep(dry_run=False) |
| #156 | assert result["status"] == "consolidated" |
| #157 | assert result["items_consolidated"] == 1 |
| #158 | |
| #159 | conn = sqlite3.connect(temp_db) |
| #160 | remaining = conn.execute("SELECT session_id FROM working_memory ORDER BY session_id").fetchall() |
| #161 | conn.close() |
| #162 | assert remaining == [("s2",)] |
| #163 | |
| #164 | def test_sleep_all_sessions_consolidates_inactive_sessions(self, temp_db): |
| #165 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #166 | conn = sqlite3.connect(temp_db) |
| #167 | old_ts = (datetime.now() - timedelta(hours=20)).isoformat() |
| #168 | fresh_ts = datetime.now().isoformat() |
| #169 | conn.executemany( |
| #170 | "INSERT INTO working_memory (id, content, source, timestamp, session_id) VALUES (?, ?, ?, ?, ?)", |
| #171 | [ |
| #172 | ("s1-old", "session one old task", "conversation", old_ts, "s1"), |
| #173 | ("s2-old", "session two old task", "conversation", old_ts, "s2"), |
| #174 | ("s2-fresh", "session two fresh task", "conversation", fresh_ts, "s2"), |
| #175 | ] |
| #176 | ) |
| #177 | conn.commit() |
| #178 | conn.close() |
| #179 | |
| #180 | result = beam.sleep_all_sessions(dry_run=False) |
| #181 | assert result["status"] == "consolidated" |
| #182 | assert result["sessions_scanned"] == 2 |
| #183 | assert result["sessions_consolidated"] == 2 |
| #184 | assert result["items_consolidated"] == 2 |
| #185 | assert result["summaries_created"] == 2 |
| #186 | assert result["errors"] == 0 |
| #187 | |
| #188 | conn = sqlite3.connect(temp_db) |
| #189 | remaining = conn.execute("SELECT id, session_id FROM working_memory").fetchall() |
| #190 | logs = conn.execute("SELECT session_id, items_consolidated FROM consolidation_log ORDER BY session_id").fetchall() |
| #191 | episodic_count = conn.execute("SELECT COUNT(*) FROM episodic_memory").fetchone()[0] |
| #192 | conn.close() |
| #193 | |
| #194 | assert remaining == [("s2-fresh", "s2")] |
| #195 | assert logs == [("s1", 1), ("s2", 1)] |
| #196 | assert episodic_count == 2 |
| #197 | |
| #198 | def test_sleep_all_sessions_dry_run_preserves_working_memory(self, temp_db): |
| #199 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #200 | conn = sqlite3.connect(temp_db) |
| #201 | old_ts = (datetime.now() - timedelta(hours=20)).isoformat() |
| #202 | conn.executemany( |
| #203 | "INSERT INTO working_memory (id, content, source, timestamp, session_id) VALUES (?, ?, ?, ?, ?)", |
| #204 | [ |
| #205 | ("s1-old", "session one task", "conversation", old_ts, "s1"), |
| #206 | ("s2-old", "session two task", "conversation", old_ts, "s2"), |
| #207 | ] |
| #208 | ) |
| #209 | conn.commit() |
| #210 | conn.close() |
| #211 | |
| #212 | result = beam.sleep_all_sessions(dry_run=True) |
| #213 | assert result["status"] == "dry_run" |
| #214 | assert result["sessions_scanned"] == 2 |
| #215 | assert result["items_consolidated"] == 2 |
| #216 | |
| #217 | conn = sqlite3.connect(temp_db) |
| #218 | working_count = conn.execute("SELECT COUNT(*) FROM working_memory").fetchone()[0] |
| #219 | episodic_count = conn.execute("SELECT COUNT(*) FROM episodic_memory").fetchone()[0] |
| #220 | log_count = conn.execute("SELECT COUNT(*) FROM consolidation_log").fetchone()[0] |
| #221 | conn.close() |
| #222 | assert working_count == 2 |
| #223 | assert episodic_count == 0 |
| #224 | assert log_count == 0 |
| #225 | |
| #226 | def test_sleep_writes_dense_embedding_for_consolidated_row(self, temp_db, monkeypatch): |
| #227 | """[C5] State-level companion to the FTS recallability test. Verifies |
| #228 | sleep populates a dense-recall store (sqlite-vec's vec_episodes when |
| #229 | loaded, otherwise the memory_embeddings fallback) for each consolidated |
| #230 | episodic row. A regression that broke the embed→write call (e.g. |
| #231 | embed() returning None silently, or a missing INSERT into the |
| #232 | fallback table) would leave dense recall empty even though FTS keeps |
| #233 | working. |
| #234 | |
| #235 | Skipped when fastembed isn't installed; the dense path is gated on |
| #236 | _embeddings.available() and a model load. CI runs with fastembed.""" |
| #237 | from mnemosyne.core import embeddings as _embeddings |
| #238 | |
| #239 | if not _embeddings.available(): |
| #240 | pytest.skip("fastembed not available — dense-recall path inactive") |
| #241 | |
| #242 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #243 | conn = sqlite3.connect(temp_db) |
| #244 | old_ts = (datetime.now() - timedelta(hours=20)).isoformat() |
| #245 | conn.executemany( |
| #246 | "INSERT INTO working_memory (id, content, source, timestamp, session_id) VALUES (?, ?, ?, ?, ?)", |
| #247 | [ |
| #248 | ("old0", "deploy plan for falcon kickoff", "conversation", old_ts, "s1"), |
| #249 | ("old1", "retro notes from beta release", "conversation", old_ts, "s1"), |
| #250 | ], |
| #251 | ) |
| #252 | conn.commit() |
| #253 | conn.close() |
| #254 | |
| #255 | beam.sleep(dry_run=False) |
| #256 | |
| #257 | # Post-sleep, exactly one episodic row should exist (one consolidated |
| #258 | # summary for the session). Dense store should hold a row for it. |
| #259 | from mnemosyne.core.beam import _vec_available |
| #260 | |
| #261 | conn = sqlite3.connect(temp_db) |
| #262 | ep_ids = [r[0] for r in conn.execute("SELECT id FROM episodic_memory").fetchall()] |
| #263 | assert len(ep_ids) == 1, f"expected 1 consolidated episodic row, got {len(ep_ids)}" |
| #264 | |
| #265 | if _vec_available(conn): |
| #266 | vec_count = conn.execute("SELECT COUNT(*) FROM vec_episodes").fetchone()[0] |
| #267 | conn.close() |
| #268 | assert vec_count >= 1, ( |
| #269 | "sleep consolidated an episodic row but vec_episodes is " |
| #270 | "empty — the embed→_vec_insert path did not run. Likely " |
| #271 | "cause: _embeddings.embed() returned None silently, or " |
| #272 | "_vec_insert raised and was swallowed." |
| #273 | ) |
| #274 | else: |
| #275 | mem_count = conn.execute( |
| #276 | "SELECT COUNT(*) FROM memory_embeddings WHERE memory_id = ?", (ep_ids[0],) |
| #277 | ).fetchone()[0] |
| #278 | conn.close() |
| #279 | assert mem_count >= 1, ( |
| #280 | "sleep consolidated an episodic row but memory_embeddings " |
| #281 | "fallback is empty — the embed→INSERT path did not run." |
| #282 | ) |
| #283 | |
| #284 | def test_sleep_consolidated_content_is_recallable(self, temp_db, monkeypatch): |
| #285 | """[C5] End-to-end recallability check. Existing sleep tests assert |
| #286 | counts (items_consolidated, episodic_count) but never verify the |
| #287 | consolidated content is actually findable through the public recall |
| #288 | API. A regression that took the consolidated row off-recall via ALL |
| #289 | recall paths simultaneously (FTS5 trigger broken AND dense store |
| #290 | skipped AND fallback substring match unreachable) would slip through |
| #291 | every existing sleep test. |
| #292 | |
| #293 | Locks: after sleep, recall(unique_token_from_seeded_wm) returns at |
| #294 | least one episodic-tier hit whose content contains that token. |
| #295 | |
| #296 | Note: this is NOT an FTS-isolated assertion. recall() unions vec |
| #297 | and FTS rowids (beam.py:1751) and falls back to substring scan |
| #298 | (beam.py:1880) when both are empty, so this test locks recallability |
| #299 | by *any* path — not the FTS path specifically. Stronger isolation |
| #300 | would require calling _fts_search directly; that lives in a follow-up |
| #301 | if the union/fallback layers shift. |
| #302 | |
| #303 | Uses LLM-disabled deterministic AAAK-encoded summary path |
| #304 | (beam.py:2483 — `compressed = aaak_encode(combined)`). AAAK is |
| #305 | phrase-substitution + compaction; uncommon literal tokens like |
| #306 | the ones seeded below survive intact. Same monkeypatch pattern as |
| #307 | test_beam.py:297, :488, :691, :938, :961.""" |
| #308 | monkeypatch.setattr("mnemosyne.core.local_llm.llm_available", lambda: False) |
| #309 | |
| #310 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #311 | conn = sqlite3.connect(temp_db) |
| #312 | old_ts = (datetime.now() - timedelta(hours=20)).isoformat() |
| #313 | # Three distinct unique tokens — one per seeded memory. |
| #314 | # Pick tokens that won't collide with FTS stop-words or the deterministic |
| #315 | # concat header text. |
| #316 | conn.executemany( |
| #317 | "INSERT INTO working_memory (id, content, source, timestamp, session_id) VALUES (?, ?, ?, ?, ?)", |
| #318 | [ |
| #319 | ("old0", "wm contains marker zorblax kickoff plan", "conversation", old_ts, "s1"), |
| #320 | ("old1", "wm contains marker quetzelfin retro notes", "conversation", old_ts, "s1"), |
| #321 | ("old2", "wm contains marker xanadush deploy log", "conversation", old_ts, "s1"), |
| #322 | ], |
| #323 | ) |
| #324 | conn.commit() |
| #325 | conn.close() |
| #326 | |
| #327 | result = beam.sleep(dry_run=False) |
| #328 | assert result["status"] == "consolidated" |
| #329 | assert result["items_consolidated"] == 3 |
| #330 | |
| #331 | # Each unique token must surface an episodic-tier result. |
| #332 | for token in ("zorblax", "quetzelfin", "xanadush"): |
| #333 | results = beam.recall(token, top_k=10) |
| #334 | assert results, ( |
| #335 | f"recall({token!r}) returned 0 results — the sleep path " |
| #336 | f"consolidated working_memory but the episodic row is not " |
| #337 | f"reachable through ANY recall path (FTS, vec, fallback " |
| #338 | f"substring scan). Likely cause: FTS5 trigger missed AND " |
| #339 | f"dense store missed AND content does not contain the " |
| #340 | f"original token (LLM summarization path active despite " |
| #341 | f"monkeypatch?)." |
| #342 | ) |
| #343 | assert any(r.get("tier") == "episodic" for r in results), ( |
| #344 | f"recall({token!r}) returned {len(results)} hits but none " |
| #345 | f"are episodic-tier: {[(r.get('tier'), r.get('content', '')[:50]) for r in results]}" |
| #346 | ) |
| #347 | assert any(token in (r.get("content") or "").lower() for r in results), ( |
| #348 | f"recall({token!r}) returned hits but the token does not " |
| #349 | f"appear in any returned content — FTS may be matching on " |
| #350 | f"trigram noise rather than the seeded token: " |
| #351 | f"{[r.get('content') for r in results]}" |
| #352 | ) |
| #353 | |
| #354 | |
| #355 | class TestMnemosyneIntegration: |
| #356 | def test_legacy_and_beam_dual_write(self, temp_db): |
| #357 | mem = Mnemosyne(session_id="s2", db_path=temp_db) |
| #358 | mid = mem.remember("Likes pizza", source="preference", importance=0.8) |
| #359 | |
| #360 | # Legacy table |
| #361 | conn = sqlite3.connect(temp_db) |
| #362 | legacy = conn.execute("SELECT * FROM memories WHERE id = ?", (mid,)).fetchone() |
| #363 | assert legacy is not None |
| #364 | |
| #365 | # BEAM working_memory should use the same ID now |
| #366 | wm = conn.execute("SELECT * FROM working_memory WHERE id = ? AND session_id = ?", (mid, "s2")).fetchone() |
| #367 | assert wm is not None |
| #368 | conn.close() |
| #369 | |
| #370 | results = mem.recall("pizza") |
| #371 | assert len(results) >= 1 |
| #372 | |
| #373 | def test_forget_removes_both_layers(self, temp_db): |
| #374 | mem = Mnemosyne(session_id="s2", db_path=temp_db) |
| #375 | mid = mem.remember("Forget me please", source="preference", importance=0.8) |
| #376 | assert mem.forget(mid) is True |
| #377 | conn = sqlite3.connect(temp_db) |
| #378 | legacy = conn.execute("SELECT * FROM memories WHERE id = ?", (mid,)).fetchone() |
| #379 | wm = conn.execute("SELECT * FROM working_memory WHERE id = ? AND session_id = ?", (mid, "s2")).fetchone() |
| #380 | conn.close() |
| #381 | assert legacy is None |
| #382 | assert wm is None |
| #383 | |
| #384 | def test_beam_stats(self, temp_db): |
| #385 | mem = Mnemosyne(session_id="s3", db_path=temp_db) |
| #386 | mem.remember("Test stat", importance=0.5) |
| #387 | stats = mem.get_stats() |
| #388 | assert stats["mode"] == "beam" |
| #389 | assert "beam" in stats |
| #390 | assert "working_memory" in stats["beam"] |
| #391 | assert "episodic_memory" in stats["beam"] |
| #392 | |
| #393 | |
| #394 | class TestExportImport: |
| #395 | def test_beam_export_to_dict(self, temp_db): |
| #396 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #397 | beam.remember("Prefers dark mode", source="preference", importance=0.9) |
| #398 | beam.scratchpad_write("todo item") |
| #399 | beam.consolidate_to_episodic("User likes dark mode", ["wm1"], importance=0.8) |
| #400 | |
| #401 | data = beam.export_to_dict() |
| #402 | assert "mnemosyne_export" in data |
| #403 | assert data["mnemosyne_export"]["version"] == "1.0" |
| #404 | assert len(data["working_memory"]) >= 1 |
| #405 | assert len(data["scratchpad"]) >= 1 |
| #406 | assert len(data["episodic_memory"]) >= 1 |
| #407 | |
| #408 | def test_beam_import_from_dict_idempotent(self, temp_db): |
| #409 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #410 | mid = beam.remember("Prefers dark mode", source="preference", importance=0.9) |
| #411 | data = beam.export_to_dict() |
| #412 | |
| #413 | # Import into fresh DB |
| #414 | with tempfile.TemporaryDirectory() as tmpdir: |
| #415 | fresh_db = Path(tmpdir) / "fresh.db" |
| #416 | fresh_beam = BeamMemory(session_id="s1", db_path=fresh_db) |
| #417 | stats = fresh_beam.import_from_dict(data) |
| #418 | assert stats["working_memory"]["inserted"] >= 1 |
| #419 | |
| #420 | # Verify |
| #421 | ctx = fresh_beam.get_context(limit=5) |
| #422 | assert any("dark mode" in c["content"] for c in ctx) |
| #423 | |
| #424 | # Second import should skip |
| #425 | stats2 = fresh_beam.import_from_dict(data) |
| #426 | assert stats2["working_memory"]["skipped"] >= 1 |
| #427 | |
| #428 | def test_mnemosyne_export_import_roundtrip(self, temp_db): |
| #429 | with tempfile.TemporaryDirectory() as tmpdir: |
| #430 | # Source |
| #431 | src = Mnemosyne(session_id="s1", db_path=temp_db) |
| #432 | src.remember("Likes pizza", source="preference", importance=0.8) |
| #433 | src.scratchpad_write("note") |
| #434 | export_path = Path(tmpdir) / "export.json" |
| #435 | src.export_to_file(str(export_path)) |
| #436 | assert export_path.exists() |
| #437 | |
| #438 | # Target |
| #439 | target_db = Path(tmpdir) / "target.db" |
| #440 | target = Mnemosyne(session_id="s1", db_path=target_db) |
| #441 | stats = target.import_from_file(str(export_path)) |
| #442 | assert stats["legacy"]["inserted"] >= 1 |
| #443 | assert stats["beam"]["working_memory"]["inserted"] >= 1 |
| #444 | |
| #445 | |
| #446 | class TestProviderContextSafety: |
| #447 | def test_subagent_context_does_not_initialize_or_write(self, temp_db, monkeypatch): |
| #448 | import importlib.util |
| #449 | import sys |
| #450 | from pathlib import Path |
| #451 | |
| #452 | repo_root = Path(__file__).resolve().parents[1] |
| #453 | if str(repo_root) not in sys.path: |
| #454 | sys.path.insert(0, str(repo_root)) |
| #455 | |
| #456 | monkeypatch.setenv("MNEMOSYNE_DATA_DIR", str(temp_db.parent)) |
| #457 | |
| #458 | provider_path = repo_root / "hermes_memory_provider" / "__init__.py" |
| #459 | spec = importlib.util.spec_from_file_location("mnemo_provider_test", provider_path) |
| #460 | mod = importlib.util.module_from_spec(spec) |
| #461 | assert spec.loader is not None |
| #462 | spec.loader.exec_module(mod) |
| #463 | |
| #464 | provider = mod.MnemosyneMemoryProvider() |
| #465 | provider.initialize( |
| #466 | "subagent-session", |
| #467 | hermes_home=str(repo_root), |
| #468 | platform="cli", |
| #469 | agent_context="subagent", |
| #470 | agent_identity="test-profile", |
| #471 | agent_workspace="hermes", |
| #472 | ) |
| #473 | |
| #474 | assert provider._beam is None |
| #475 | result = provider.handle_tool_call( |
| #476 | "mnemosyne_remember", |
| #477 | { |
| #478 | "content": "subagent should not persist memory", |
| #479 | "importance": 0.9, |
| #480 | "source": "test", |
| #481 | "scope": "session", |
| #482 | }, |
| #483 | ) |
| #484 | assert "not initialized" in result |
| #485 | |
| #486 | conn = sqlite3.connect(temp_db) |
| #487 | cursor = conn.cursor() |
| #488 | cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='working_memory'") |
| #489 | exists = cursor.fetchone() is not None |
| #490 | count = conn.execute("SELECT COUNT(*) FROM working_memory").fetchone()[0] if exists else 0 |
| #491 | conn.close() |
| #492 | assert count == 0 |
| #493 | |
| #494 | |
| #495 | class TestCrossSessionRecall: |
| #496 | def test_global_memory_survives_consolidation_and_recall(self, temp_db, monkeypatch): |
| #497 | """Regression for issue #7 Bug 2: global memories must survive sleep() and be recallable cross-session.""" |
| #498 | monkeypatch.setenv("MNEMOSYNE_DATA_DIR", str(temp_db.parent)) |
| #499 | # Disable LLM summarization so original Chinese text is preserved in consolidation |
| #500 | monkeypatch.setattr("mnemosyne.core.local_llm.llm_available", lambda: False) |
| #501 | |
| #502 | # Session A: store global memories with backdated timestamps so sleep() consolidates them |
| #503 | beam_a = BeamMemory(session_id="hermes_session-A", db_path=temp_db) |
| #504 | beam_a.remember("用户喜欢直接说结论", source="preference", importance=0.95, scope="global") |
| #505 | beam_a.remember("用户讨论基金时重视手续费口径", source="preference", importance=0.92, scope="global") |
| #506 | beam_a.remember("本轮只测试 mnemosyne 沙盒", source="test", importance=0.80, scope="session") |
| #507 | |
| #508 | # Backdate all working memories so they are old enough to consolidate |
| #509 | conn = sqlite3.connect(temp_db) |
| #510 | old_ts = (datetime.now() - timedelta(hours=48)).isoformat() |
| #511 | conn.execute("UPDATE working_memory SET timestamp = ?", (old_ts,)) |
| #512 | conn.commit() |
| #513 | conn.close() |
| #514 | |
| #515 | # Force consolidation (simulate on_session_end) |
| #516 | result = beam_a.sleep() |
| #517 | assert result["status"] == "consolidated" |
| #518 | |
| #519 | # Verify consolidated episodic memories preserved global scope |
| #520 | conn = sqlite3.connect(temp_db) |
| #521 | conn.row_factory = sqlite3.Row |
| #522 | cursor = conn.cursor() |
| #523 | cursor.execute("SELECT content, scope, session_id FROM episodic_memory WHERE scope = 'global'") |
| #524 | global_rows = cursor.fetchall() |
| #525 | assert len(global_rows) >= 1, "Global memories should survive consolidation with scope preserved" |
| #526 | conn.close() |
| #527 | |
| #528 | # Session B: recall global memories |
| #529 | beam_b = BeamMemory(session_id="hermes_session-B", db_path=temp_db) |
| #530 | |
| #531 | # Test Chinese query that previously returned 0 |
| #532 | results = beam_b.recall("谁喜欢直接说结论", top_k=5) |
| #533 | assert len(results) > 0, "Cross-session recall should find global memory with Chinese query" |
| #534 | contents = [r["content"] for r in results] |
| #535 | assert any("用户喜欢直接说结论" in c for c in contents) |
| #536 | |
| #537 | # Test another Chinese query |
| #538 | results2 = beam_b.recall("基金讨论时看重什么口径", top_k=5) |
| #539 | assert len(results2) > 0, "Cross-session recall should find second global memory" |
| #540 | |
| #541 | # Test that session-scoped memory is NOT visible cross-session |
| #542 | results3 = beam_b.recall("本轮只测试", top_k=5) |
| #543 | # This may or may not find it depending on scoring; the key is globals ARE found |
| #544 | |
| #545 | def test_fallback_scoring_finds_chinese_substrings(self, temp_db, monkeypatch): |
| #546 | """Fallback keyword scoring must handle Chinese where words aren't space-delimited.""" |
| #547 | monkeypatch.setenv("MNEMOSYNE_DATA_DIR", str(temp_db.parent)) |
| #548 | |
| #549 | beam = BeamMemory(session_id="test-session", db_path=temp_db) |
| #550 | beam.remember("用户喜欢直接说结论", source="preference", importance=0.9, scope="global") |
| #551 | |
| #552 | # Query that differs at the start but shares a core substring |
| #553 | results = beam.recall("谁喜欢直接说结论", top_k=5) |
| #554 | assert len(results) > 0, "Fallback scoring should match shared substrings in Chinese" |
| #555 | |
| #556 | def test_tools_session_singleton_updates(self, temp_db, monkeypatch): |
| #557 | """Plugin tools _get_memory() must recreate when HERMES_SESSION_ID changes.""" |
| #558 | monkeypatch.setenv("MNEMOSYNE_DATA_DIR", str(temp_db.parent)) |
| #559 | |
| #560 | import importlib.util |
| #561 | import sys |
| #562 | from pathlib import Path |
| #563 | repo_root = Path(__file__).resolve().parents[1] |
| #564 | if str(repo_root) not in sys.path: |
| #565 | sys.path.insert(0, str(repo_root)) |
| #566 | |
| #567 | tools_path = repo_root / "hermes_plugin" / "tools.py" |
| #568 | spec = importlib.util.spec_from_file_location("mnemo_tools_test", tools_path) |
| #569 | mod = importlib.util.module_from_spec(spec) |
| #570 | assert spec.loader is not None |
| #571 | spec.loader.exec_module(mod) |
| #572 | _get_memory = mod._get_memory |
| #573 | |
| #574 | monkeypatch.setenv("HERMES_SESSION_ID", "session-alpha") |
| #575 | mem_a = _get_memory() |
| #576 | mem_a.remember("alpha fact", source="test", scope="session") |
| #577 | |
| #578 | monkeypatch.setenv("HERMES_SESSION_ID", "session-beta") |
| #579 | mem_b = _get_memory() |
| #580 | # Should be a different instance (or at least different beam session_id) |
| #581 | assert mem_b.session_id == "session-beta" |
| #582 | assert mem_a.session_id == "session-alpha" |
| #583 | |
| #584 | |
| #585 | class TestTemporalQueries: |
| #586 | """Temporal filtering for BEAM recall — Issue #16.""" |
| #587 | |
| #588 | def test_recall_from_date_filter(self, temp_db): |
| #589 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #590 | beam.remember("Meeting about Q1 goals", source="meeting", importance=0.8) |
| #591 | |
| #592 | # Backdate an old memory directly |
| #593 | conn = sqlite3.connect(temp_db) |
| #594 | old_ts = "2025-01-15T10:00:00" |
| #595 | conn.execute( |
| #596 | "INSERT INTO working_memory (id, content, source, timestamp, session_id, importance) VALUES (?, ?, ?, ?, ?, ?)", |
| #597 | ("old1", "Old project kickoff", "meeting", old_ts, "s1", 0.7) |
| #598 | ) |
| #599 | conn.commit() |
| #600 | conn.close() |
| #601 | |
| #602 | # Filter from 2025-04-01 should exclude January memory |
| #603 | results = beam.recall("project", from_date="2025-04-01") |
| #604 | assert all("Old project kickoff" not in r["content"] for r in results) |
| #605 | |
| #606 | def test_recall_to_date_filter(self, temp_db): |
| #607 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #608 | beam.remember("Recent standup notes", source="meeting", importance=0.8) |
| #609 | |
| #610 | # Backdate an old memory |
| #611 | conn = sqlite3.connect(temp_db) |
| #612 | old_ts = "2025-01-15T10:00:00" |
| #613 | conn.execute( |
| #614 | "INSERT INTO working_memory (id, content, source, timestamp, session_id, importance) VALUES (?, ?, ?, ?, ?, ?)", |
| #615 | ("old1", "January planning session", "meeting", old_ts, "s1", 0.7) |
| #616 | ) |
| #617 | conn.commit() |
| #618 | conn.close() |
| #619 | |
| #620 | # Filter to 2025-02-01 should only include January memory |
| #621 | results = beam.recall("planning", to_date="2025-02-01") |
| #622 | assert any("January" in r["content"] for r in results) |
| #623 | assert all("Recent" not in r["content"] for r in results) |
| #624 | |
| #625 | def test_recall_source_filter(self, temp_db): |
| #626 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #627 | beam.remember("Bug fix for auth", source="github", importance=0.8) |
| #628 | beam.remember("Lunch with team", source="conversation", importance=0.5) |
| #629 | |
| #630 | results = beam.recall("auth", source="github") |
| #631 | assert len(results) >= 1 |
| #632 | assert all(r.get("source") == "github" for r in results) |
| #633 | |
| #634 | def test_recall_date_range_filter(self, temp_db): |
| #635 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #636 | |
| #637 | # Insert memories on different dates |
| #638 | conn = sqlite3.connect(temp_db) |
| #639 | dates = [ |
| #640 | ("2025-01-10T10:00:00", "January task A"), |
| #641 | ("2025-03-15T10:00:00", "March task B"), |
| #642 | ("2025-06-20T10:00:00", "June task C"), |
| #643 | ] |
| #644 | for ts, content in dates: |
| #645 | conn.execute( |
| #646 | "INSERT INTO working_memory (id, content, source, timestamp, session_id, importance) VALUES (?, ?, ?, ?, ?, ?)", |
| #647 | (f"m_{content[:5]}", content, "test", ts, "s1", 0.7) |
| #648 | ) |
| #649 | conn.commit() |
| #650 | conn.close() |
| #651 | |
| #652 | # Range: March to May |
| #653 | results = beam.recall("task", from_date="2025-03-01", to_date="2025-05-31") |
| #654 | contents = [r["content"] for r in results] |
| #655 | assert any("March" in c for c in contents) |
| #656 | assert all("January" not in c for c in contents) |
| #657 | assert all("June" not in c for c in contents) |
| #658 | |
| #659 | def test_recall_with_episodic_temporal_filter(self, temp_db): |
| #660 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #661 | # Consolidated memory with old timestamp |
| #662 | beam.consolidate_to_episodic( |
| #663 | summary="Q4 review discussion", |
| #664 | source_wm_ids=["wm1"], |
| #665 | source="meeting", |
| #666 | importance=0.8 |
| #667 | ) |
| #668 | # Backdate the episodic memory |
| #669 | conn = sqlite3.connect(temp_db) |
| #670 | conn.execute("UPDATE episodic_memory SET timestamp = ? WHERE content = ?", ("2024-12-01T10:00:00", "Q4 review discussion")) |
| #671 | conn.commit() |
| #672 | conn.close() |
| #673 | |
| #674 | # Should find it without date filter |
| #675 | results_all = beam.recall("Q4 review") |
| #676 | assert any("Q4" in r["content"] for r in results_all) |
| #677 | |
| #678 | # Should exclude it with from_date in 2025 |
| #679 | results_filtered = beam.recall("Q4 review", from_date="2025-01-01") |
| #680 | assert all("Q4" not in r["content"] for r in results_filtered) |
| #681 | |
| #682 | def test_temporal_triple_auto_generated(self, temp_db): |
| #683 | """Temporal triples should be auto-generated on remember().""" |
| #684 | from mnemosyne.core.triples import TripleStore |
| #685 | |
| #686 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #687 | mid = beam.remember("Deploy script updated", source="dev", importance=0.8) |
| #688 | |
| #689 | triple_store = TripleStore(db_path=temp_db) |
| #690 | triples = triple_store.query(subject=mid) |
| #691 | assert len(triples) >= 1 |
| #692 | assert any(t["predicate"] == "occurred_on" for t in triples) |
| #693 | |
| #694 | |
| #695 | class TestTokenAwareConsolidation: |
| #696 | def test_sleep_chunks_large_batches(self, temp_db, monkeypatch): |
| #697 | """BUG-1: sleep() must chunk memories to fit LLM context window.""" |
| #698 | monkeypatch.setenv("MNEMOSYNE_DATA_DIR", str(temp_db.parent)) |
| #699 | # Force a small context window to trigger chunking |
| #700 | monkeypatch.setenv("MNEMOSYNE_LLM_N_CTX", "512") |
| #701 | monkeypatch.setenv("MNEMOSYNE_LLM_MAX_TOKENS", "128") |
| #702 | # Disable actual LLM — we test the chunking logic |
| #703 | monkeypatch.setattr("mnemosyne.core.local_llm.llm_available", lambda: False) |
| #704 | |
| #705 | beam = BeamMemory(session_id="test-chunking", db_path=temp_db) |
| #706 | |
| #707 | # Store 30 memories, each ~100 chars (~25 tokens) |
| #708 | for i in range(30): |
| #709 | beam.remember( |
| #710 | f"Memory number {i} with enough content to consume tokens " * 3, |
| #711 | source="test_batch", |
| #712 | importance=0.5 |
| #713 | ) |
| #714 | |
| #715 | # Backdate so sleep() picks them up |
| #716 | conn = sqlite3.connect(temp_db) |
| #717 | old_ts = (datetime.now() - timedelta(hours=48)).isoformat() |
| #718 | conn.execute("UPDATE working_memory SET timestamp = ?", (old_ts,)) |
| #719 | conn.commit() |
| #720 | conn.close() |
| #721 | |
| #722 | result = beam.sleep() |
| #723 | assert result["status"] == "consolidated" |
| #724 | assert result["summaries_created"] >= 1 |
| #725 | |
| #726 | # Verify no working memory left for this session |
| #727 | conn = sqlite3.connect(temp_db) |
| #728 | cursor = conn.cursor() |
| #729 | cursor.execute("SELECT COUNT(*) FROM working_memory WHERE session_id = ?", ("test-chunking",)) |
| #730 | count = cursor.fetchone()[0] |
| #731 | conn.close() |
| #732 | assert count == 0 |
| #733 | |
| #734 | def test_chunk_memories_by_budget_single_oversized(self, monkeypatch): |
| #735 | """A single memory exceeding the budget should be skipped from LLM chunking.""" |
| #736 | from mnemosyne.core import local_llm |
| #737 | |
| #738 | # Monkeypatch module-level constants directly (env vars already read at import) |
| #739 | monkeypatch.setattr(local_llm, "LLM_N_CTX", 128) |
| #740 | monkeypatch.setattr(local_llm, "LLM_MAX_TOKENS", 32) |
| #741 | |
| #742 | from mnemosyne.core.local_llm import chunk_memories_by_budget |
| #743 | |
| #744 | # One normal memory, one giant memory |
| #745 | memories = [ |
| #746 | "Short memory.", # ~3 tokens, fits |
| #747 | "A" * 500, # ~125 tokens, exceeds budget |
| #748 | ] |
| #749 | chunks = chunk_memories_by_budget(memories) |
| #750 | |
| #751 | # Giant memory should be excluded (it exceeds the total budget) |
| #752 | assert len(chunks) == 1 |
| #753 | assert chunks[0] == ["Short memory."] |
| #754 | |
| #755 | |
| #756 | class TestTieredDegradation: |
| #757 | """Tests for tiered episodic degradation — Phase 1 of the tiered memory system.""" |
| #758 | |
| #759 | def test_schema_migration_adds_tier_columns(self, temp_db): |
| #760 | """Wave 1: init_beam() should add tier and degraded_at columns to episodic_memory.""" |
| #761 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #762 | # Just creating a BeamMemory triggers init_beam which runs the migration |
| #763 | |
| #764 | conn = sqlite3.connect(temp_db) |
| #765 | cursor = conn.cursor() |
| #766 | cols = [r[1] for r in cursor.execute("PRAGMA table_info(episodic_memory)").fetchall()] |
| #767 | assert "tier" in cols, "tier column missing after migration" |
| #768 | assert "degraded_at" in cols, "degraded_at column missing after migration" |
| #769 | |
| #770 | # Verify index exists |
| #771 | indexes = [r[1] for r in cursor.execute( |
| #772 | "SELECT * FROM sqlite_master WHERE type='index' AND tbl_name='episodic_memory'" |
| #773 | ).fetchall()] |
| #774 | assert any("tier" in idx for idx in indexes), "idx_em_tier index missing" |
| #775 | conn.close() |
| #776 | |
| #777 | def test_episodic_memory_defaults_to_tier_1(self, temp_db): |
| #778 | """New episodic memories should default to tier 1.""" |
| #779 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #780 | eid = beam.consolidate_to_episodic( |
| #781 | summary="Default tier should be 1", |
| #782 | source_wm_ids=["wm1"], |
| #783 | importance=0.8 |
| #784 | ) |
| #785 | |
| #786 | conn = sqlite3.connect(temp_db) |
| #787 | cursor = conn.cursor() |
| #788 | tier = cursor.execute( |
| #789 | "SELECT tier FROM episodic_memory WHERE id = ?", (eid,) |
| #790 | ).fetchone()[0] |
| #791 | conn.close() |
| #792 | assert tier == 1, f"Expected tier=1, got tier={tier}" |
| #793 | |
| #794 | def test_degrade_episodic_tier1_to_tier2(self, temp_db, monkeypatch): |
| #795 | """Tier 1 memories older than TIER2_DAYS should degrade to tier 2.""" |
| #796 | # Module-level constants are read at import time — patch them directly |
| #797 | monkeypatch.setattr("mnemosyne.core.beam.TIER2_DAYS", 5) |
| #798 | monkeypatch.setattr("mnemosyne.core.beam.TIER3_DAYS", 200) # far future — won't trigger tier 3 |
| #799 | |
| #800 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #801 | eid = beam.consolidate_to_episodic( |
| #802 | summary="This memory is old enough for tier 2 degradation", |
| #803 | source_wm_ids=["wm1"], |
| #804 | importance=0.7 |
| #805 | ) |
| #806 | |
| #807 | # Backdate the episodic memory to be older than 5 days |
| #808 | conn = sqlite3.connect(temp_db) |
| #809 | old_ts = (datetime.now() - timedelta(days=10)).isoformat() |
| #810 | conn.execute("UPDATE episodic_memory SET created_at = ? WHERE id = ?", (old_ts, eid)) |
| #811 | conn.commit() |
| #812 | conn.close() |
| #813 | |
| #814 | result = beam.degrade_episodic(dry_run=False) |
| #815 | assert result["tier1_to_tier2"] == 1 |
| #816 | assert result["tier2_to_tier3"] == 0 |
| #817 | |
| #818 | # Verify tier changed |
| #819 | conn = sqlite3.connect(temp_db) |
| #820 | cursor = conn.cursor() |
| #821 | tier, degraded_at = cursor.execute( |
| #822 | "SELECT tier, degraded_at FROM episodic_memory WHERE id = ?", (eid,) |
| #823 | ).fetchone() |
| #824 | conn.close() |
| #825 | assert tier == 2 |
| #826 | assert degraded_at is not None |
| #827 | |
| #828 | def test_degrade_episodic_tier2_to_tier3(self, temp_db, monkeypatch): |
| #829 | """Tier 2 memories older than TIER3_DAYS should degrade to tier 3.""" |
| #830 | monkeypatch.setattr("mnemosyne.core.beam.TIER2_DAYS", 1) |
| #831 | monkeypatch.setattr("mnemosyne.core.beam.TIER3_DAYS", 5) |
| #832 | |
| #833 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #834 | eid = beam.consolidate_to_episodic( |
| #835 | summary="This memory will go all the way to tier 3", |
| #836 | source_wm_ids=["wm1"], |
| #837 | importance=0.6 |
| #838 | ) |
| #839 | |
| #840 | # First degrade to tier 2 (older than 1 day) |
| #841 | conn = sqlite3.connect(temp_db) |
| #842 | old_ts = (datetime.now() - timedelta(days=3)).isoformat() |
| #843 | conn.execute("UPDATE episodic_memory SET created_at = ? WHERE id = ?", (old_ts, eid)) |
| #844 | conn.commit() |
| #845 | conn.close() |
| #846 | beam.degrade_episodic(dry_run=False) # tier 1 → 2 |
| #847 | |
| #848 | # Then push it even older and degrade again |
| #849 | conn = sqlite3.connect(temp_db) |
| #850 | very_old_ts = (datetime.now() - timedelta(days=10)).isoformat() |
| #851 | conn.execute("UPDATE episodic_memory SET created_at = ?, tier = 2 WHERE id = ?", (very_old_ts, eid)) |
| #852 | conn.commit() |
| #853 | conn.close() |
| #854 | |
| #855 | result = beam.degrade_episodic(dry_run=False) |
| #856 | assert result["tier2_to_tier3"] == 1 |
| #857 | |
| #858 | conn = sqlite3.connect(temp_db) |
| #859 | tier = conn.execute("SELECT tier FROM episodic_memory WHERE id = ?", (eid,)).fetchone()[0] |
| #860 | conn.close() |
| #861 | assert tier == 3 |
| #862 | |
| #863 | def test_degrade_episodic_dry_run(self, temp_db, monkeypatch): |
| #864 | """Dry run counts candidates but does NOT modify the database.""" |
| #865 | monkeypatch.setattr("mnemosyne.core.beam.TIER2_DAYS", 5) |
| #866 | |
| #867 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #868 | beam.consolidate_to_episodic( |
| #869 | summary="Should be counted but not degraded", |
| #870 | source_wm_ids=["wm1"], |
| #871 | importance=0.7 |
| #872 | ) |
| #873 | |
| #874 | conn = sqlite3.connect(temp_db) |
| #875 | old_ts = (datetime.now() - timedelta(days=10)).isoformat() |
| #876 | conn.execute("UPDATE episodic_memory SET created_at = ?", (old_ts,)) |
| #877 | conn.commit() |
| #878 | |
| #879 | result = beam.degrade_episodic(dry_run=True) |
| #880 | assert result["status"] == "dry_run" |
| #881 | assert result["tier1_to_tier2"] == 1 |
| #882 | |
| #883 | # Tier should still be 1 — dry run doesn't modify |
| #884 | tier = conn.execute("SELECT tier FROM episodic_memory").fetchone()[0] |
| #885 | conn.close() |
| #886 | assert tier == 1, "Dry run should not change tier" |
| #887 | |
| #888 | def test_degrade_episodic_respects_batch_limit(self, temp_db, monkeypatch): |
| #889 | """Degradation should respect DEGRADE_BATCH_SIZE limit.""" |
| #890 | monkeypatch.setattr("mnemosyne.core.beam.TIER2_DAYS", 1) |
| #891 | monkeypatch.setattr("mnemosyne.core.beam.DEGRADE_BATCH_SIZE", 3) |
| #892 | |
| #893 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #894 | # Consolidate 5 episodic memories first |
| #895 | eids = [] |
| #896 | for i in range(5): |
| #897 | eid = beam.consolidate_to_episodic( |
| #898 | summary=f"Memory {i} for batch limit test", |
| #899 | source_wm_ids=[f"wm{i}"], |
| #900 | importance=0.5 |
| #901 | ) |
| #902 | eids.append(eid) |
| #903 | |
| #904 | # Backdate them all in a single raw connection block |
| #905 | conn = sqlite3.connect(temp_db, timeout=10) |
| #906 | old_ts = (datetime.now() - timedelta(days=10)).isoformat() |
| #907 | for eid in eids: |
| #908 | conn.execute("UPDATE episodic_memory SET created_at = ? WHERE id = ?", (old_ts, eid)) |
| #909 | conn.commit() |
| #910 | conn.close() |
| #911 | |
| #912 | result = beam.degrade_episodic(dry_run=False) |
| #913 | # Should degrade at most DEGRADE_BATCH_SIZE (3), not all 5 |
| #914 | assert result["tier1_to_tier2"] <= 3 |
| #915 | |
| #916 | def test_tier_weighting_in_recall(self, temp_db, monkeypatch): |
| #917 | """Tier 3 memories should score lower than tier 1 in recall.""" |
| #918 | monkeypatch.setattr("mnemosyne.core.beam.TIER2_DAYS", 1) |
| #919 | monkeypatch.setattr("mnemosyne.core.beam.TIER3_DAYS", 5) |
| #920 | monkeypatch.setattr("mnemosyne.core.beam.TIER3_WEIGHT", 0.1) # heavily penalize |
| #921 | |
| #922 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #923 | eid = beam.consolidate_to_episodic( |
| #924 | summary="Python projects use virtual environments for isolation", |
| #925 | source_wm_ids=["wm1"], |
| #926 | importance=0.9 |
| #927 | ) |
| #928 | |
| #929 | # Degrade to tier 3 |
| #930 | conn = sqlite3.connect(temp_db) |
| #931 | very_old_ts = (datetime.now() - timedelta(days=30)).isoformat() |
| #932 | conn.execute("UPDATE episodic_memory SET created_at = ? WHERE id = ?", (very_old_ts, eid)) |
| #933 | conn.commit() |
| #934 | beam.degrade_episodic(dry_run=False) # t1→t2 |
| #935 | conn.execute("UPDATE episodic_memory SET tier = 2, created_at = ? WHERE id = ?", (very_old_ts, eid)) |
| #936 | conn.commit() |
| #937 | beam.degrade_episodic(dry_run=False) # t2→t3 |
| #938 | conn.close() |
| #939 | |
| #940 | results = beam.recall("Python virtual environments", top_k=5) |
| #941 | # Should still be findable (just weighted lower) |
| #942 | degraded = [r for r in results if r.get("degradation_tier") == 3] |
| #943 | if degraded: |
| #944 | assert degraded[0]["score"] < 1.0, f"Tier 3 score {degraded[0]['score']} should be penalized" |
| #945 | |
| #946 | def test_sleep_includes_degradation(self, temp_db, monkeypatch): |
| #947 | """sleep() return value must include degradation key.""" |
| #948 | monkeypatch.setenv("MNEMOSYNE_TIER2_DAYS", "30") # no actual degradation, just testing the key |
| #949 | monkeypatch.setenv("MNEMOSYNE_TIER3_DAYS", "200") |
| #950 | monkeypatch.setattr("mnemosyne.core.local_llm.llm_available", lambda: False) |
| #951 | |
| #952 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #953 | # Inject old working memory to trigger consolidation |
| #954 | conn = sqlite3.connect(temp_db) |
| #955 | old_ts = (datetime.now() - timedelta(hours=48)).isoformat() |
| #956 | for i in range(2): |
| #957 | conn.execute( |
| #958 | "INSERT INTO working_memory (id, content, source, timestamp, session_id) VALUES (?, ?, ?, ?, ?)", |
| #959 | (f"old{i}", f"sleep test content {i}", "conversation", old_ts, "s1") |
| #960 | ) |
| #961 | conn.commit() |
| #962 | conn.close() |
| #963 | |
| #964 | result = beam.sleep(dry_run=False) |
| #965 | assert "degradation" in result, "sleep() should include degradation key" |
| #966 | assert "status" in result["degradation"] |
| #967 | assert "tier1_to_tier2" in result["degradation"] |
| #968 | |
| #969 | def test_sleep_all_sessions_includes_degradation(self, temp_db, monkeypatch): |
| #970 | """sleep_all_sessions() return value must include degradation key.""" |
| #971 | monkeypatch.setenv("MNEMOSYNE_TIER2_DAYS", "30") |
| #972 | monkeypatch.setenv("MNEMOSYNE_TIER3_DAYS", "200") |
| #973 | monkeypatch.setattr("mnemosyne.core.local_llm.llm_available", lambda: False) |
| #974 | |
| #975 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #976 | conn = sqlite3.connect(temp_db) |
| #977 | old_ts = (datetime.now() - timedelta(hours=48)).isoformat() |
| #978 | conn.execute( |
| #979 | "INSERT INTO working_memory (id, content, source, timestamp, session_id) VALUES (?, ?, ?, ?, ?)", |
| #980 | ("s2-old", "all sessions sleep test", "conversation", old_ts, "s2") |
| #981 | ) |
| #982 | conn.commit() |
| #983 | conn.close() |
| #984 | |
| #985 | result = beam.sleep_all_sessions(dry_run=False) |
| #986 | assert "degradation" in result, "sleep_all_sessions() should include degradation key" |
| #987 | |
| #988 | def test_old_memory_still_recallable_after_degradation(self, temp_db, monkeypatch): |
| #989 | """Integration: store old memory, degrade to tier 3, still recallable.""" |
| #990 | monkeypatch.setattr("mnemosyne.core.beam.TIER2_DAYS", 1) |
| #991 | monkeypatch.setattr("mnemosyne.core.beam.TIER3_DAYS", 5) |
| #992 | monkeypatch.setattr("mnemosyne.core.local_llm.llm_available", lambda: False) |
| #993 | |
| #994 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #995 | eid = beam.consolidate_to_episodic( |
| #996 | summary="The user's favorite programming language is Rust for systems work", |
| #997 | source_wm_ids=["wm1"], |
| #998 | importance=0.85 |
| #999 | ) |
| #1000 | |
| #1001 | conn = sqlite3.connect(temp_db) |
| #1002 | very_old_ts = (datetime.now() - timedelta(days=200)).isoformat() |
| #1003 | conn.execute("UPDATE episodic_memory SET created_at = ? WHERE id = ?", (very_old_ts, eid)) |
| #1004 | conn.commit() |
| #1005 | beam.degrade_episodic(dry_run=False) # t1→t2 |
| #1006 | conn.execute("UPDATE episodic_memory SET tier = 2 WHERE id = ?", (eid,)) |
| #1007 | conn.commit() |
| #1008 | beam.degrade_episodic(dry_run=False) # t2→t3 |
| #1009 | |
| #1010 | # Verify it's tier 3 |
| #1011 | tier = conn.execute("SELECT tier FROM episodic_memory WHERE id = ?", (eid,)).fetchone()[0] |
| #1012 | print(f"DEBUG: tier after double degrade = {tier}") |
| #1013 | content = conn.execute("SELECT content FROM episodic_memory WHERE id = ?", (eid,)).fetchone()[0] |
| #1014 | print(f"DEBUG: tier 3 content = {content[:100]}") |
| #1015 | conn.close() |
| #1016 | assert tier == 3, f"Expected tier 3, got {tier}" |
| #1017 | |
| #1018 | # Should still be recallable — this is the marketing promise |
| #1019 | results = beam.recall("favorite programming language", top_k=5) |
| #1020 | contents = [r["content"] for r in results] |
| #1021 | assert len(results) > 0, "Tier 3 memory should still be recallable" |
| #1022 | assert any("Rust" in c for c in contents), ( |
| #1023 | f"Tier 3 memory should contain 'Rust', got contents: {contents}" |
| #1024 | ) |
| #1025 | |
| #1026 | |
| #1027 | class TestSmartCompression: |
| #1028 | """Phase 2: entity-aware extraction for tier 2→3 degradation.""" |
| #1029 | |
| #1030 | def test_extract_key_signal_keeps_proper_nouns(self, temp_db): |
| #1031 | """Entities like names, tools, and versions should survive compression.""" |
| #1032 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #1033 | content = ( |
| #1034 | "The user's favorite editor is Neovim with LazyVim. " |
| #1035 | "They deploy everything with Docker Compose. " |
| #1036 | "Their preferred language is Rust for systems work. " |
| #1037 | "The weather was nice on Tuesday. " |
| #1038 | "Nothing special happened in the morning. " |
| #1039 | "They also use GitHub Actions for CI/CD." |
| #1040 | ) |
| #1041 | result = beam._extract_key_signal(content, max_chars=200) |
| #1042 | # Signal sentences should be present |
| #1043 | assert "Neovim" in result, f"Lost 'Neovim': {result}" |
| #1044 | assert "Docker" in result, f"Lost 'Docker': {result}" |
| #1045 | assert "Rust" in result, f"Lost 'Rust': {result}" |
| #1046 | # Low-signal sentences should be dropped |
| #1047 | assert "weather" not in result, f"Weather survived: {result}" |
| #1048 | assert "Nothing special" not in result, f"Noise survived: {result}" |
| #1049 | |
| #1050 | def test_extract_key_signal_handles_no_sentences(self, temp_db): |
| #1051 | """Single blob of text without sentence boundaries — falls back to prefix.""" |
| #1052 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #1053 | content = "A" * 500 # No punctuation |
| #1054 | result = beam._extract_key_signal(content, max_chars=100) |
| #1055 | assert len(result) <= 110 # 100 chars + " [...]" |
| #1056 | assert result.startswith("A" * 90) |
| #1057 | |
| #1058 | def test_extract_key_signal_short_content_passthrough(self, temp_db): |
| #1059 | """Content under max_chars should be returned as-is.""" |
| #1060 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #1061 | content = "Short memory about Python." |
| #1062 | result = beam._extract_key_signal(content, max_chars=500) |
| #1063 | assert result == content |
| #1064 | |
| #1065 | def test_smart_compress_preserves_entities_in_degradation(self, temp_db, monkeypatch): |
| #1066 | """End-to-end: smart compression keeps key facts where naive prefix would lose them.""" |
| #1067 | monkeypatch.setattr("mnemosyne.core.beam.TIER2_DAYS", 1) |
| #1068 | monkeypatch.setattr("mnemosyne.core.beam.TIER3_DAYS", 5) |
| #1069 | monkeypatch.setattr("mnemosyne.core.beam.SMART_COMPRESS", True) |
| #1070 | monkeypatch.setattr("mnemosyne.core.beam.TIER3_MAX_CHARS", 200) |
| #1071 | monkeypatch.setattr("mnemosyne.core.local_llm.llm_available", lambda: False) |
| #1072 | |
| #1073 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #1074 | |
| #1075 | # Memory where the most important fact is at the END |
| #1076 | eid = beam.consolidate_to_episodic( |
| #1077 | summary=( |
| #1078 | "Morning standup was uneventful. The coffee was cold. " |
| #1079 | "Lunch was a sandwich from the deli. Team discussed vacation plans. " |
| #1080 | "CRITICAL: The production database password was changed to XKCD-correct-horse-battery-staple. " |
| #1081 | "Afternoon was quiet. Went home at 5pm." |
| #1082 | ), |
| #1083 | source_wm_ids=["wm1"], |
| #1084 | importance=0.9 |
| #1085 | ) |
| #1086 | |
| #1087 | # Backdate and degrade to tier 3 |
| #1088 | conn = sqlite3.connect(temp_db) |
| #1089 | very_old_ts = (datetime.now() - timedelta(days=200)).isoformat() |
| #1090 | conn.execute("UPDATE episodic_memory SET created_at = ? WHERE id = ?", (very_old_ts, eid)) |
| #1091 | conn.commit() |
| #1092 | beam.degrade_episodic(dry_run=False) # t1→t2 |
| #1093 | conn.execute("UPDATE episodic_memory SET tier = 2 WHERE id = ?", (eid,)) |
| #1094 | conn.commit() |
| #1095 | conn.close() |
| #1096 | beam.degrade_episodic(dry_run=False) # t2→t3 |
| #1097 | |
| #1098 | # Verify the critical fact survived |
| #1099 | conn = sqlite3.connect(temp_db) |
| #1100 | tier3_content = conn.execute( |
| #1101 | "SELECT content FROM episodic_memory WHERE id = ?", (eid,) |
| #1102 | ).fetchone()[0] |
| #1103 | conn.close() |
| #1104 | |
| #1105 | assert "XKCD" in tier3_content or "password" in tier3_content, ( |
| #1106 | f"Smart compression should preserve critical entities. Got: {tier3_content}" |
| #1107 | ) |
| #1108 | # Naive prefix would have kept "Morning standup was uneventful" — useless |
| #1109 | |
| #1110 | |
| #1111 | class TestVeracity: |
| #1112 | """Phase 3: memory confidence / veracity signal.""" |
| #1113 | |
| #1114 | def test_schema_adds_veracity_columns(self, temp_db): |
| #1115 | """init_beam should add veracity to working_memory and episodic_memory.""" |
| #1116 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #1117 | |
| #1118 | conn = sqlite3.connect(temp_db) |
| #1119 | wm_cols = [r[1] for r in conn.execute("PRAGMA table_info(working_memory)").fetchall()] |
| #1120 | em_cols = [r[1] for r in conn.execute("PRAGMA table_info(episodic_memory)").fetchall()] |
| #1121 | conn.close() |
| #1122 | assert "veracity" in wm_cols |
| #1123 | assert "veracity" in em_cols |
| #1124 | |
| #1125 | def test_remember_defaults_to_unknown(self, temp_db): |
| #1126 | """remember() without explicit veracity defaults to 'unknown'.""" |
| #1127 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #1128 | mid = beam.remember("A fact", source="test", importance=0.5) |
| #1129 | |
| #1130 | conn = sqlite3.connect(temp_db) |
| #1131 | veracity = conn.execute( |
| #1132 | "SELECT veracity FROM working_memory WHERE id = ?", (mid,) |
| #1133 | ).fetchone()[0] |
| #1134 | conn.close() |
| #1135 | assert veracity == "unknown" |
| #1136 | |
| #1137 | def test_remember_explicit_veracity(self, temp_db): |
| #1138 | """remember() with veracity='stated' stores correctly.""" |
| #1139 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #1140 | mid = beam.remember("User said this", source="user", veracity="stated") |
| #1141 | |
| #1142 | conn = sqlite3.connect(temp_db) |
| #1143 | veracity = conn.execute( |
| #1144 | "SELECT veracity FROM working_memory WHERE id = ?", (mid,) |
| #1145 | ).fetchone()[0] |
| #1146 | conn.close() |
| #1147 | assert veracity == "stated" |
| #1148 | |
| #1149 | def test_recall_veracity_filter(self, temp_db): |
| #1150 | """recall(veracity='stated') should only return stated memories.""" |
| #1151 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #1152 | beam.remember("Stated preference: dark mode", source="user", veracity="stated") |
| #1153 | beam.remember("Inferred: probably likes Python", source="conversation", veracity="inferred") |
| #1154 | beam.remember("Tool output: cron ran at 3am", source="cron", veracity="tool") |
| #1155 | |
| #1156 | results = beam.recall("preference", veracity="stated") |
| #1157 | assert all(r["veracity"] == "stated" for r in results) |
| #1158 | assert any("dark mode" in r["content"] for r in results) |
| #1159 | |
| #1160 | def test_veracity_weighting_in_recall(self, temp_db, monkeypatch): |
| #1161 | """Stated memories should score higher than inferred ones.""" |
| #1162 | monkeypatch.setattr("mnemosyne.core.beam.STATED_WEIGHT", 1.0) |
| #1163 | monkeypatch.setattr("mnemosyne.core.beam.INFERRED_WEIGHT", 0.3) |
| #1164 | |
| #1165 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #1166 | # Consolidate two similar memories with different veracity |
| #1167 | beam.consolidate_to_episodic( |
| #1168 | summary="User stated: prefers Rust for systems programming", |
| #1169 | source_wm_ids=["wm1"], |
| #1170 | importance=0.8 |
| #1171 | ) |
| #1172 | beam.consolidate_to_episodic( |
| #1173 | summary="Inferred: agent thinks user likes Go", |
| #1174 | source_wm_ids=["wm2"], |
| #1175 | importance=0.8 |
| #1176 | ) |
| #1177 | |
| #1178 | # Set veracity directly in DB |
| #1179 | conn = sqlite3.connect(temp_db) |
| #1180 | conn.execute("UPDATE episodic_memory SET veracity = 'stated' WHERE content LIKE '%stated%'") |
| #1181 | conn.execute("UPDATE episodic_memory SET veracity = 'inferred' WHERE content LIKE '%Inferred%'") |
| #1182 | conn.commit() |
| #1183 | conn.close() |
| #1184 | |
| #1185 | results = beam.recall("systems programming language", top_k=5) |
| #1186 | stated_results = [r for r in results if r.get("veracity") == "stated"] |
| #1187 | inferred_results = [r for r in results if r.get("veracity") == "inferred"] |
| #1188 | |
| #1189 | if stated_results and inferred_results: |
| #1190 | assert stated_results[0]["score"] > inferred_results[0]["score"], ( |
| #1191 | f"Stated score {stated_results[0]['score']} should exceed inferred {inferred_results[0]['score']}" |
| #1192 | ) |
| #1193 | |
| #1194 | def test_get_contaminated_returns_non_stated(self, temp_db): |
| #1195 | """get_contaminated() should return inferred/tool/imported/unknown but not stated.""" |
| #1196 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #1197 | |
| #1198 | eids = [] |
| #1199 | for content, veracity_val in [ |
| #1200 | ("User said this explicitly", "stated"), |
| #1201 | ("Agent inferred this", "inferred"), |
| #1202 | ("Cron injected this", "tool"), |
| #1203 | ("Imported from Mem0", "imported"), |
| #1204 | ("Legacy uncategorized memory", "unknown"), |
| #1205 | ]: |
| #1206 | eid = beam.consolidate_to_episodic( |
| #1207 | summary=content, source_wm_ids=["wm"], importance=0.8 |
| #1208 | ) |
| #1209 | eids.append(eid) |
| #1210 | |
| #1211 | conn = sqlite3.connect(temp_db) |
| #1212 | for eid, veracity_val in zip(eids, ["stated", "inferred", "tool", "imported", "unknown"]): |
| #1213 | conn.execute("UPDATE episodic_memory SET veracity = ? WHERE id = ?", (veracity_val, eid)) |
| #1214 | conn.commit() |
| #1215 | conn.close() |
| #1216 | |
| #1217 | contaminated = beam.get_contaminated(limit=10) |
| #1218 | contents = [c["content"] for c in contaminated] |
| #1219 | assert any("inferred" in c for c in contents) |
| #1220 | assert any("Cron injected" in c for c in contents) |
| #1221 | assert any("Imported" in c for c in contents) |
| #1222 | assert any("Legacy" in c for c in contents) |
| #1223 | # Stated should NOT appear |
| #1224 | assert not any("explicitly" in c for c in contents) |
| #1225 | |
| #1226 | def test_get_contaminated_respects_importance(self, temp_db): |
| #1227 | """get_contaminated() with min_importance filter.""" |
| #1228 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #1229 | |
| #1230 | eid = beam.consolidate_to_episodic( |
| #1231 | summary="Low importance memory", source_wm_ids=["wm"], importance=0.2 |
| #1232 | ) |
| #1233 | conn = sqlite3.connect(temp_db) |
| #1234 | conn.execute("UPDATE episodic_memory SET veracity = 'inferred' WHERE id = ?", (eid,)) |
| #1235 | conn.commit() |
| #1236 | conn.close() |
| #1237 | |
| #1238 | results = beam.get_contaminated(limit=10, min_importance=0.5) |
| #1239 | assert len(results) == 0, "Low importance should be filtered out" |
| #1240 | |
| #1241 | def test_recall_still_works_without_veracity_filter(self, temp_db): |
| #1242 | """recall() without veracity filter should return all memories.""" |
| #1243 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #1244 | beam.remember("Fact A", veracity="stated") |
| #1245 | beam.remember("Fact B", veracity="inferred") |
| #1246 | |
| #1247 | results = beam.recall("Fact", top_k=10) |
| #1248 | assert len(results) >= 2 |
| #1249 |