repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources15d ago| #1 | """Regression tests for [C18.b]: degrade_episodic updates content text but |
| #2 | leaves stale dense embeddings. Pre-fix the embedding stored in vec_episodes |
| #3 | or memory_embeddings still represented the ORIGINAL content even after the |
| #4 | content was compressed/truncated, causing dense recall to score against |
| #5 | content that no longer exists in the row. |
| #6 | |
| #7 | Two tests: |
| #8 | 1. With embeddings provider available, degrade regenerates the embedding |
| #9 | to match the new compressed content. |
| #10 | 2. With embeddings provider unavailable, degrade invalidates (deletes) |
| #11 | the stale embedding so dense recall doesn't return semantically |
| #12 | misleading results. |
| #13 | """ |
| #14 | |
| #15 | import sqlite3 |
| #16 | import tempfile |
| #17 | from datetime import datetime, timedelta |
| #18 | from pathlib import Path |
| #19 | |
| #20 | import numpy as np |
| #21 | import pytest |
| #22 | |
| #23 | from mnemosyne.core import beam as beam_module |
| #24 | from mnemosyne.core.beam import BeamMemory |
| #25 | |
| #26 | |
| #27 | @pytest.fixture |
| #28 | def temp_db(): |
| #29 | with tempfile.TemporaryDirectory() as tmpdir: |
| #30 | yield Path(tmpdir) / "test.db" |
| #31 | |
| #32 | |
| #33 | def _content_to_vec(text: str, dim: int = 384) -> np.ndarray: |
| #34 | """Deterministic content-encoding 'embedding'. Different content |
| #35 | produces different vectors. Spreads signal across many dimensions |
| #36 | so binarization (sign-based) and other compression schemes can |
| #37 | detect content changes. |
| #38 | |
| #39 | Uses a seeded RNG so different texts produce observably different |
| #40 | vectors while identical texts remain identical.""" |
| #41 | rng = np.random.RandomState(hash(text) & 0x7FFFFFFF) |
| #42 | # Base signal: spread text-derived hash across full dimension range |
| #43 | v = rng.randn(dim).astype(np.float32) * 0.1 |
| #44 | # Anchor dimensions with stronger content-specific signal |
| #45 | if text: |
| #46 | v[0] = float(len(text)) * 0.01 |
| #47 | v[1] = float(ord(text[0])) * 0.01 |
| #48 | h = hash(text) & 0xFFFF |
| #49 | v[2] = float(h % 256) * 0.01 |
| #50 | v[3] = float((h >> 8) % 256) * 0.01 |
| #51 | return v |
| #52 | |
| #53 | |
| #54 | @pytest.fixture |
| #55 | def fake_embeddings(monkeypatch): |
| #56 | """Patch the embeddings module: available() returns True, embed() |
| #57 | returns content-deterministic vectors, and force the in-memory |
| #58 | fallback path so we don't need sqlite-vec loaded.""" |
| #59 | from mnemosyne.core import embeddings as emb |
| #60 | |
| #61 | monkeypatch.setattr(emb, "available", lambda: True) |
| #62 | monkeypatch.setattr( |
| #63 | emb, "embed", |
| #64 | lambda texts: np.stack([_content_to_vec(t) for t in texts]), |
| #65 | ) |
| #66 | # Force the memory_embeddings fallback path; sqlite-vec presence |
| #67 | # varies across test environments and the bug is identical for |
| #68 | # both stores. |
| #69 | monkeypatch.setattr(beam_module, "_vec_available", lambda conn: False) |
| #70 | return emb |
| #71 | |
| #72 | |
| #73 | def _read_fallback_embedding(db_path, memory_id): |
| #74 | """Return the serialized embedding stored in memory_embeddings for |
| #75 | the given memory_id, or None if missing.""" |
| #76 | conn = sqlite3.connect(str(db_path)) |
| #77 | try: |
| #78 | row = conn.execute( |
| #79 | "SELECT embedding_json FROM memory_embeddings WHERE memory_id = ?", |
| #80 | (memory_id,), |
| #81 | ).fetchone() |
| #82 | return row[0] if row else None |
| #83 | finally: |
| #84 | conn.close() |
| #85 | |
| #86 | |
| #87 | def _read_binary_vector(db_path, memory_id): |
| #88 | conn = sqlite3.connect(str(db_path)) |
| #89 | try: |
| #90 | row = conn.execute( |
| #91 | "SELECT binary_vector FROM episodic_memory WHERE id = ?", |
| #92 | (memory_id,), |
| #93 | ).fetchone() |
| #94 | return row[0] if row else None |
| #95 | finally: |
| #96 | conn.close() |
| #97 | |
| #98 | |
| #99 | class TestDegradeEpisodicVectorRefresh: |
| #100 | |
| #101 | def test_tier_2_to_tier_3_regenerates_embedding(self, temp_db, fake_embeddings): |
| #102 | """When tier 2→3 truncation changes content, the embedding stored |
| #103 | in memory_embeddings must update to match the new content.""" |
| #104 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #105 | |
| #106 | # Long original content that will be truncated by tier 2→3 (TIER3_MAX_CHARS=300) |
| #107 | original = ("ORIGINAL_DETAILED_CONTEXT " * 30).strip() |
| #108 | assert len(original) > beam_module.TIER3_MAX_CHARS |
| #109 | |
| #110 | memory_id = beam.consolidate_to_episodic( |
| #111 | summary=original, |
| #112 | source_wm_ids=["fake-wm"], |
| #113 | importance=0.6, |
| #114 | ) |
| #115 | |
| #116 | original_embedding = _read_fallback_embedding(temp_db, memory_id) |
| #117 | assert original_embedding is not None, ( |
| #118 | "memory_embeddings should contain a row after consolidate_to_episodic" |
| #119 | ) |
| #120 | |
| #121 | # Backdate to make the row eligible for tier 2→3 and set tier=2 so it |
| #122 | # hits the truncation path (skips the LLM-summarization tier 1→2 path |
| #123 | # which is a no-op when local_llm is unavailable). |
| #124 | old_ts = (datetime.now() - timedelta(days=beam_module.TIER3_DAYS + 1)).isoformat() |
| #125 | conn = sqlite3.connect(str(temp_db)) |
| #126 | conn.execute( |
| #127 | "UPDATE episodic_memory SET tier = 2, created_at = ? WHERE id = ?", |
| #128 | (old_ts, memory_id), |
| #129 | ) |
| #130 | conn.commit() |
| #131 | conn.close() |
| #132 | |
| #133 | result = beam.degrade_episodic(dry_run=False) |
| #134 | assert result["tier2_to_tier3"] == 1, ( |
| #135 | f"Expected one tier 2→3 transition, got {result}" |
| #136 | ) |
| #137 | |
| #138 | conn = sqlite3.connect(str(temp_db)) |
| #139 | new_content = conn.execute( |
| #140 | "SELECT content FROM episodic_memory WHERE id = ?", (memory_id,) |
| #141 | ).fetchone()[0] |
| #142 | conn.close() |
| #143 | assert new_content != original, "tier 2→3 should have truncated the content" |
| #144 | |
| #145 | post_embedding = _read_fallback_embedding(temp_db, memory_id) |
| #146 | assert post_embedding is not None, ( |
| #147 | "memory_embeddings row missing after degrade; expected regenerated, " |
| #148 | "not deleted, when the embeddings provider is available" |
| #149 | ) |
| #150 | assert post_embedding != original_embedding, ( |
| #151 | "memory_embeddings still holds the pre-degradation embedding — " |
| #152 | "dense recall would score against original content while displaying " |
| #153 | "truncated content. C18.b regeneration did not run." |
| #154 | ) |
| #155 | |
| #156 | def test_tier_2_to_tier_3_regenerates_binary_vector(self, temp_db, fake_embeddings): |
| #157 | """The binary_vector column on episodic_memory must also update |
| #158 | to match the new content.""" |
| #159 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #160 | |
| #161 | original = ("ORIGINAL_DETAILED_CONTEXT " * 30).strip() |
| #162 | memory_id = beam.consolidate_to_episodic( |
| #163 | summary=original, |
| #164 | source_wm_ids=["fake-wm"], |
| #165 | importance=0.6, |
| #166 | ) |
| #167 | |
| #168 | if beam_module._mib is None: |
| #169 | pytest.skip("binary vectorization not available in this build") |
| #170 | |
| #171 | original_bv = _read_binary_vector(temp_db, memory_id) |
| #172 | assert original_bv is not None |
| #173 | |
| #174 | old_ts = (datetime.now() - timedelta(days=beam_module.TIER3_DAYS + 1)).isoformat() |
| #175 | conn = sqlite3.connect(str(temp_db)) |
| #176 | conn.execute( |
| #177 | "UPDATE episodic_memory SET tier = 2, created_at = ? WHERE id = ?", |
| #178 | (old_ts, memory_id), |
| #179 | ) |
| #180 | conn.commit() |
| #181 | conn.close() |
| #182 | |
| #183 | beam.degrade_episodic(dry_run=False) |
| #184 | |
| #185 | post_bv = _read_binary_vector(temp_db, memory_id) |
| #186 | assert post_bv is not None, ( |
| #187 | "binary_vector should be present (regenerated, not nulled) when " |
| #188 | "the embedding provider is available" |
| #189 | ) |
| #190 | assert post_bv != original_bv, ( |
| #191 | "binary_vector still holds pre-degradation bytes — same C18.b drift" |
| #192 | ) |
| #193 | |
| #194 | def test_tier_1_to_tier_2_llm_path_regenerates_embedding( |
| #195 | self, temp_db, fake_embeddings, monkeypatch |
| #196 | ): |
| #197 | """Tier 1→2 with the LLM-summarization path active: monkeypatched |
| #198 | local_llm replaces content with a stub summary. The embedding |
| #199 | must regenerate to match. Closes the test gap from /review — |
| #200 | same _refresh_episodic_embedding path is exercised but a |
| #201 | regression specific to the tier 1→2 hookup (e.g., wrong arg) |
| #202 | wouldn't slip through.""" |
| #203 | from mnemosyne.core import local_llm |
| #204 | |
| #205 | monkeypatch.setattr(local_llm, "llm_available", lambda: True) |
| #206 | monkeypatch.setattr( |
| #207 | local_llm, "summarize_memories", |
| #208 | lambda lines, source="": "STUB SUMMARY produced by LLM (deterministic)", |
| #209 | ) |
| #210 | |
| #211 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #212 | # Long content (>300 chars) so the tier 1→2 path hits the LLM branch |
| #213 | long_original = ("OriginalLongFactPrefix " + "padding " * 100).strip() |
| #214 | assert len(long_original) > 300 |
| #215 | |
| #216 | memory_id = beam.consolidate_to_episodic( |
| #217 | summary=long_original, |
| #218 | source_wm_ids=["wm-1"], |
| #219 | importance=0.6, |
| #220 | ) |
| #221 | original_embedding = _read_fallback_embedding(temp_db, memory_id) |
| #222 | assert original_embedding is not None |
| #223 | |
| #224 | # Backdate so the row is eligible for tier 1→2 (tier defaults to 1 |
| #225 | # in the schema, so we just need the timestamp). |
| #226 | old_ts = (datetime.now() - timedelta(days=beam_module.TIER2_DAYS + 1)).isoformat() |
| #227 | conn = sqlite3.connect(str(temp_db)) |
| #228 | conn.execute( |
| #229 | "UPDATE episodic_memory SET created_at = ? WHERE id = ?", |
| #230 | (old_ts, memory_id), |
| #231 | ) |
| #232 | conn.commit() |
| #233 | conn.close() |
| #234 | |
| #235 | result = beam.degrade_episodic(dry_run=False) |
| #236 | assert result["tier1_to_tier2"] == 1, ( |
| #237 | f"Expected one tier 1→2 transition, got {result}" |
| #238 | ) |
| #239 | |
| #240 | # Content should now be the stub summary |
| #241 | conn = sqlite3.connect(str(temp_db)) |
| #242 | new_content = conn.execute( |
| #243 | "SELECT content FROM episodic_memory WHERE id = ?", (memory_id,) |
| #244 | ).fetchone()[0] |
| #245 | conn.close() |
| #246 | assert new_content.startswith("STUB SUMMARY"), ( |
| #247 | f"tier 1→2 LLM path didn't replace content: {new_content[:60]!r}" |
| #248 | ) |
| #249 | |
| #250 | # Embedding must match new (summary) content, not original |
| #251 | post_embedding = _read_fallback_embedding(temp_db, memory_id) |
| #252 | assert post_embedding is not None |
| #253 | assert post_embedding != original_embedding, ( |
| #254 | "Embedding still reflects pre-LLM content; tier 1→2 path " |
| #255 | "did not call _refresh_episodic_embedding" |
| #256 | ) |
| #257 | |
| #258 | def test_refresh_failure_rolls_back_content_update( |
| #259 | self, temp_db, fake_embeddings, monkeypatch |
| #260 | ): |
| #261 | """[C18.b /review finding #1] If _refresh_episodic_embedding raises |
| #262 | after the UPDATE statement runs, the SAVEPOINT must roll back the |
| #263 | content mutation so we don't commit content=NEW with embedding=OLD |
| #264 | (the very drift this PR fixes). Pre-fix, the broad except in the |
| #265 | loop body swallowed the refresh exception and the UPDATE stayed |
| #266 | staged in the implicit transaction.""" |
| #267 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #268 | |
| #269 | original = ("ORIGINAL_DETAILED_CONTEXT " * 30).strip() |
| #270 | memory_id = beam.consolidate_to_episodic( |
| #271 | summary=original, |
| #272 | source_wm_ids=["fake-wm"], |
| #273 | importance=0.6, |
| #274 | ) |
| #275 | |
| #276 | # Backdate + mark tier 2 so degrade hits the tier 2→3 path. |
| #277 | old_ts = (datetime.now() - timedelta(days=beam_module.TIER3_DAYS + 1)).isoformat() |
| #278 | conn = sqlite3.connect(str(temp_db)) |
| #279 | conn.execute( |
| #280 | "UPDATE episodic_memory SET tier = 2, created_at = ? WHERE id = ?", |
| #281 | (old_ts, memory_id), |
| #282 | ) |
| #283 | conn.commit() |
| #284 | conn.close() |
| #285 | |
| #286 | # Force the refresh to raise mid-call so we exercise the SAVEPOINT |
| #287 | # rollback path. We patch on the instance so other beam instances |
| #288 | # in the same test session aren't affected. |
| #289 | def boom(*args, **kwargs): |
| #290 | raise RuntimeError("simulated refresh failure") |
| #291 | monkeypatch.setattr(beam, "_refresh_episodic_embedding", boom) |
| #292 | |
| #293 | result = beam.degrade_episodic(dry_run=False) |
| #294 | # The row should NOT count as consolidated since the savepoint |
| #295 | # rolled back. |
| #296 | assert result["tier2_to_tier3"] == 0, ( |
| #297 | f"Refresh raised but row still counted as degraded: {result}" |
| #298 | ) |
| #299 | |
| #300 | # Critically: content must remain at the original (rollback worked), |
| #301 | # NOT the truncated form. If the SAVEPOINT didn't roll back, the |
| #302 | # UPDATE would have committed and we'd see truncated content + stale |
| #303 | # embedding — exactly the C18.b drift. |
| #304 | conn = sqlite3.connect(str(temp_db)) |
| #305 | post_content = conn.execute( |
| #306 | "SELECT content, tier FROM episodic_memory WHERE id = ?", (memory_id,) |
| #307 | ).fetchone() |
| #308 | conn.close() |
| #309 | assert post_content[0] == original, ( |
| #310 | f"SAVEPOINT did not roll back: content was mutated despite refresh " |
| #311 | f"failure. Got {post_content[0][:60]!r}, expected original." |
| #312 | ) |
| #313 | assert post_content[1] == 2, ( |
| #314 | f"SAVEPOINT did not roll back tier change: got tier={post_content[1]}" |
| #315 | ) |
| #316 | |
| #317 | def test_tier_2_to_tier_3_invalidates_when_provider_unavailable( |
| #318 | self, temp_db, monkeypatch |
| #319 | ): |
| #320 | """If embeddings provider is unavailable at degrade time, the stale |
| #321 | embedding rows must be invalidated so dense recall can't return |
| #322 | semantically misleading hits.""" |
| #323 | from mnemosyne.core import embeddings as emb |
| #324 | |
| #325 | # Phase 1: provider available — seed. |
| #326 | monkeypatch.setattr(emb, "available", lambda: True) |
| #327 | monkeypatch.setattr( |
| #328 | emb, "embed", |
| #329 | lambda texts: np.stack([_content_to_vec(t) for t in texts]), |
| #330 | ) |
| #331 | monkeypatch.setattr(beam_module, "_vec_available", lambda conn: False) |
| #332 | |
| #333 | beam = BeamMemory(session_id="s1", db_path=temp_db) |
| #334 | original = ("ORIGINAL_DETAILED_CONTEXT " * 30).strip() |
| #335 | memory_id = beam.consolidate_to_episodic( |
| #336 | summary=original, |
| #337 | source_wm_ids=["fake-wm"], |
| #338 | importance=0.6, |
| #339 | ) |
| #340 | assert _read_fallback_embedding(temp_db, memory_id) is not None |
| #341 | |
| #342 | # Phase 2: provider goes unavailable BEFORE degrade. |
| #343 | monkeypatch.setattr(emb, "available", lambda: False) |
| #344 | |
| #345 | old_ts = (datetime.now() - timedelta(days=beam_module.TIER3_DAYS + 1)).isoformat() |
| #346 | conn = sqlite3.connect(str(temp_db)) |
| #347 | conn.execute( |
| #348 | "UPDATE episodic_memory SET tier = 2, created_at = ? WHERE id = ?", |
| #349 | (old_ts, memory_id), |
| #350 | ) |
| #351 | conn.commit() |
| #352 | conn.close() |
| #353 | |
| #354 | beam.degrade_episodic(dry_run=False) |
| #355 | |
| #356 | post_embedding = _read_fallback_embedding(temp_db, memory_id) |
| #357 | assert post_embedding is None, ( |
| #358 | "Stale memory_embeddings row remained after degrade with no embeddings " |
| #359 | "provider. Should have been deleted to avoid ranking against content " |
| #360 | "that no longer matches the row's text." |
| #361 | ) |
| #362 | |
| #363 | post_bv = _read_binary_vector(temp_db, memory_id) |
| #364 | if beam_module._mib is not None: |
| #365 | assert post_bv is None, ( |
| #366 | "binary_vector should be NULLed when the embedding provider is " |
| #367 | "unavailable at degrade time" |
| #368 | ) |
| #369 |