my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	"""Regression tests for [C18.b]: degrade_episodic updates content text but
#2	leaves stale dense embeddings. Pre-fix the embedding stored in vec_episodes
#3	or memory_embeddings still represented the ORIGINAL content even after the
#4	content was compressed/truncated, causing dense recall to score against
#5	content that no longer exists in the row.
#6
#7	Two tests:
#8	1. With embeddings provider available, degrade regenerates the embedding
#9	to match the new compressed content.
#10	2. With embeddings provider unavailable, degrade invalidates (deletes)
#11	the stale embedding so dense recall doesn't return semantically
#12	misleading results.
#13	"""
#14
#15	import sqlite3
#16	import tempfile
#17	from datetime import datetime, timedelta
#18	from pathlib import Path
#19
#20	import numpy as np
#21	import pytest
#22
#23	from mnemosyne.core import beam as beam_module
#24	from mnemosyne.core.beam import BeamMemory
#25
#26
#27	@pytest.fixture
#28	def temp_db():
#29	with tempfile.TemporaryDirectory() as tmpdir:
#30	yield Path(tmpdir) / "test.db"
#31
#32
#33	def _content_to_vec(text: str, dim: int = 384) -> np.ndarray:
#34	"""Deterministic content-encoding 'embedding'. Different content
#35	produces different vectors. Spreads signal across many dimensions
#36	so binarization (sign-based) and other compression schemes can
#37	detect content changes.
#38
#39	Uses a seeded RNG so different texts produce observably different
#40	vectors while identical texts remain identical."""
#41	rng = np.random.RandomState(hash(text) & 0x7FFFFFFF)
#42	# Base signal: spread text-derived hash across full dimension range
#43	v = rng.randn(dim).astype(np.float32) * 0.1
#44	# Anchor dimensions with stronger content-specific signal
#45	if text:
#46	v[0] = float(len(text)) * 0.01
#47	v[1] = float(ord(text[0])) * 0.01
#48	h = hash(text) & 0xFFFF
#49	v[2] = float(h % 256) * 0.01
#50	v[3] = float((h >> 8) % 256) * 0.01
#51	return v
#52
#53
#54	@pytest.fixture
#55	def fake_embeddings(monkeypatch):
#56	"""Patch the embeddings module: available() returns True, embed()
#57	returns content-deterministic vectors, and force the in-memory
#58	fallback path so we don't need sqlite-vec loaded."""
#59	from mnemosyne.core import embeddings as emb
#60
#61	monkeypatch.setattr(emb, "available", lambda: True)
#62	monkeypatch.setattr(
#63	emb, "embed",
#64	lambda texts: np.stack([_content_to_vec(t) for t in texts]),
#65	)
#66	# Force the memory_embeddings fallback path; sqlite-vec presence
#67	# varies across test environments and the bug is identical for
#68	# both stores.
#69	monkeypatch.setattr(beam_module, "_vec_available", lambda conn: False)
#70	return emb
#71
#72
#73	def _read_fallback_embedding(db_path, memory_id):
#74	"""Return the serialized embedding stored in memory_embeddings for
#75	the given memory_id, or None if missing."""
#76	conn = sqlite3.connect(str(db_path))
#77	try:
#78	row = conn.execute(
#79	"SELECT embedding_json FROM memory_embeddings WHERE memory_id = ?",
#80	(memory_id,),
#81	).fetchone()
#82	return row[0] if row else None
#83	finally:
#84	conn.close()
#85
#86
#87	def _read_binary_vector(db_path, memory_id):
#88	conn = sqlite3.connect(str(db_path))
#89	try:
#90	row = conn.execute(
#91	"SELECT binary_vector FROM episodic_memory WHERE id = ?",
#92	(memory_id,),
#93	).fetchone()
#94	return row[0] if row else None
#95	finally:
#96	conn.close()
#97
#98
#99	class TestDegradeEpisodicVectorRefresh:
#100
#101	def test_tier_2_to_tier_3_regenerates_embedding(self, temp_db, fake_embeddings):
#102	"""When tier 2→3 truncation changes content, the embedding stored
#103	in memory_embeddings must update to match the new content."""
#104	beam = BeamMemory(session_id="s1", db_path=temp_db)
#105
#106	# Long original content that will be truncated by tier 2→3 (TIER3_MAX_CHARS=300)
#107	original = ("ORIGINAL_DETAILED_CONTEXT " * 30).strip()
#108	assert len(original) > beam_module.TIER3_MAX_CHARS
#109
#110	memory_id = beam.consolidate_to_episodic(
#111	summary=original,
#112	source_wm_ids=["fake-wm"],
#113	importance=0.6,
#114	)
#115
#116	original_embedding = _read_fallback_embedding(temp_db, memory_id)
#117	assert original_embedding is not None, (
#118	"memory_embeddings should contain a row after consolidate_to_episodic"
#119	)
#120
#121	# Backdate to make the row eligible for tier 2→3 and set tier=2 so it
#122	# hits the truncation path (skips the LLM-summarization tier 1→2 path
#123	# which is a no-op when local_llm is unavailable).
#124	old_ts = (datetime.now() - timedelta(days=beam_module.TIER3_DAYS + 1)).isoformat()
#125	conn = sqlite3.connect(str(temp_db))
#126	conn.execute(
#127	"UPDATE episodic_memory SET tier = 2, created_at = ? WHERE id = ?",
#128	(old_ts, memory_id),
#129	)
#130	conn.commit()
#131	conn.close()
#132
#133	result = beam.degrade_episodic(dry_run=False)
#134	assert result["tier2_to_tier3"] == 1, (
#135	f"Expected one tier 2→3 transition, got {result}"
#136	)
#137
#138	conn = sqlite3.connect(str(temp_db))
#139	new_content = conn.execute(
#140	"SELECT content FROM episodic_memory WHERE id = ?", (memory_id,)
#141	).fetchone()[0]
#142	conn.close()
#143	assert new_content != original, "tier 2→3 should have truncated the content"
#144
#145	post_embedding = _read_fallback_embedding(temp_db, memory_id)
#146	assert post_embedding is not None, (
#147	"memory_embeddings row missing after degrade; expected regenerated, "
#148	"not deleted, when the embeddings provider is available"
#149	)
#150	assert post_embedding != original_embedding, (
#151	"memory_embeddings still holds the pre-degradation embedding — "
#152	"dense recall would score against original content while displaying "
#153	"truncated content. C18.b regeneration did not run."
#154	)
#155
#156	def test_tier_2_to_tier_3_regenerates_binary_vector(self, temp_db, fake_embeddings):
#157	"""The binary_vector column on episodic_memory must also update
#158	to match the new content."""
#159	beam = BeamMemory(session_id="s1", db_path=temp_db)
#160
#161	original = ("ORIGINAL_DETAILED_CONTEXT " * 30).strip()
#162	memory_id = beam.consolidate_to_episodic(
#163	summary=original,
#164	source_wm_ids=["fake-wm"],
#165	importance=0.6,
#166	)
#167
#168	if beam_module._mib is None:
#169	pytest.skip("binary vectorization not available in this build")
#170
#171	original_bv = _read_binary_vector(temp_db, memory_id)
#172	assert original_bv is not None
#173
#174	old_ts = (datetime.now() - timedelta(days=beam_module.TIER3_DAYS + 1)).isoformat()
#175	conn = sqlite3.connect(str(temp_db))
#176	conn.execute(
#177	"UPDATE episodic_memory SET tier = 2, created_at = ? WHERE id = ?",
#178	(old_ts, memory_id),
#179	)
#180	conn.commit()
#181	conn.close()
#182
#183	beam.degrade_episodic(dry_run=False)
#184
#185	post_bv = _read_binary_vector(temp_db, memory_id)
#186	assert post_bv is not None, (
#187	"binary_vector should be present (regenerated, not nulled) when "
#188	"the embedding provider is available"
#189	)
#190	assert post_bv != original_bv, (
#191	"binary_vector still holds pre-degradation bytes — same C18.b drift"
#192	)
#193
#194	def test_tier_1_to_tier_2_llm_path_regenerates_embedding(
#195	self, temp_db, fake_embeddings, monkeypatch
#196	):
#197	"""Tier 1→2 with the LLM-summarization path active: monkeypatched
#198	local_llm replaces content with a stub summary. The embedding
#199	must regenerate to match. Closes the test gap from /review —
#200	same _refresh_episodic_embedding path is exercised but a
#201	regression specific to the tier 1→2 hookup (e.g., wrong arg)
#202	wouldn't slip through."""
#203	from mnemosyne.core import local_llm
#204
#205	monkeypatch.setattr(local_llm, "llm_available", lambda: True)
#206	monkeypatch.setattr(
#207	local_llm, "summarize_memories",
#208	lambda lines, source="": "STUB SUMMARY produced by LLM (deterministic)",
#209	)
#210
#211	beam = BeamMemory(session_id="s1", db_path=temp_db)
#212	# Long content (>300 chars) so the tier 1→2 path hits the LLM branch
#213	long_original = ("OriginalLongFactPrefix " + "padding " * 100).strip()
#214	assert len(long_original) > 300
#215
#216	memory_id = beam.consolidate_to_episodic(
#217	summary=long_original,
#218	source_wm_ids=["wm-1"],
#219	importance=0.6,
#220	)
#221	original_embedding = _read_fallback_embedding(temp_db, memory_id)
#222	assert original_embedding is not None
#223
#224	# Backdate so the row is eligible for tier 1→2 (tier defaults to 1
#225	# in the schema, so we just need the timestamp).
#226	old_ts = (datetime.now() - timedelta(days=beam_module.TIER2_DAYS + 1)).isoformat()
#227	conn = sqlite3.connect(str(temp_db))
#228	conn.execute(
#229	"UPDATE episodic_memory SET created_at = ? WHERE id = ?",
#230	(old_ts, memory_id),
#231	)
#232	conn.commit()
#233	conn.close()
#234
#235	result = beam.degrade_episodic(dry_run=False)
#236	assert result["tier1_to_tier2"] == 1, (
#237	f"Expected one tier 1→2 transition, got {result}"
#238	)
#239
#240	# Content should now be the stub summary
#241	conn = sqlite3.connect(str(temp_db))
#242	new_content = conn.execute(
#243	"SELECT content FROM episodic_memory WHERE id = ?", (memory_id,)
#244	).fetchone()[0]
#245	conn.close()
#246	assert new_content.startswith("STUB SUMMARY"), (
#247	f"tier 1→2 LLM path didn't replace content: {new_content[:60]!r}"
#248	)
#249
#250	# Embedding must match new (summary) content, not original
#251	post_embedding = _read_fallback_embedding(temp_db, memory_id)
#252	assert post_embedding is not None
#253	assert post_embedding != original_embedding, (
#254	"Embedding still reflects pre-LLM content; tier 1→2 path "
#255	"did not call _refresh_episodic_embedding"
#256	)
#257
#258	def test_refresh_failure_rolls_back_content_update(
#259	self, temp_db, fake_embeddings, monkeypatch
#260	):
#261	"""[C18.b /review finding #1] If _refresh_episodic_embedding raises
#262	after the UPDATE statement runs, the SAVEPOINT must roll back the
#263	content mutation so we don't commit content=NEW with embedding=OLD
#264	(the very drift this PR fixes). Pre-fix, the broad except in the
#265	loop body swallowed the refresh exception and the UPDATE stayed
#266	staged in the implicit transaction."""
#267	beam = BeamMemory(session_id="s1", db_path=temp_db)
#268
#269	original = ("ORIGINAL_DETAILED_CONTEXT " * 30).strip()
#270	memory_id = beam.consolidate_to_episodic(
#271	summary=original,
#272	source_wm_ids=["fake-wm"],
#273	importance=0.6,
#274	)
#275
#276	# Backdate + mark tier 2 so degrade hits the tier 2→3 path.
#277	old_ts = (datetime.now() - timedelta(days=beam_module.TIER3_DAYS + 1)).isoformat()
#278	conn = sqlite3.connect(str(temp_db))
#279	conn.execute(
#280	"UPDATE episodic_memory SET tier = 2, created_at = ? WHERE id = ?",
#281	(old_ts, memory_id),
#282	)
#283	conn.commit()
#284	conn.close()
#285
#286	# Force the refresh to raise mid-call so we exercise the SAVEPOINT
#287	# rollback path. We patch on the instance so other beam instances
#288	# in the same test session aren't affected.
#289	def boom(args, *kwargs):
#290	raise RuntimeError("simulated refresh failure")
#291	monkeypatch.setattr(beam, "_refresh_episodic_embedding", boom)
#292
#293	result = beam.degrade_episodic(dry_run=False)
#294	# The row should NOT count as consolidated since the savepoint
#295	# rolled back.
#296	assert result["tier2_to_tier3"] == 0, (
#297	f"Refresh raised but row still counted as degraded: {result}"
#298	)
#299
#300	# Critically: content must remain at the original (rollback worked),
#301	# NOT the truncated form. If the SAVEPOINT didn't roll back, the
#302	# UPDATE would have committed and we'd see truncated content + stale
#303	# embedding — exactly the C18.b drift.
#304	conn = sqlite3.connect(str(temp_db))
#305	post_content = conn.execute(
#306	"SELECT content, tier FROM episodic_memory WHERE id = ?", (memory_id,)
#307	).fetchone()
#308	conn.close()
#309	assert post_content[0] == original, (
#310	f"SAVEPOINT did not roll back: content was mutated despite refresh "
#311	f"failure. Got {post_content[0][:60]!r}, expected original."
#312	)
#313	assert post_content[1] == 2, (
#314	f"SAVEPOINT did not roll back tier change: got tier={post_content[1]}"
#315	)
#316
#317	def test_tier_2_to_tier_3_invalidates_when_provider_unavailable(
#318	self, temp_db, monkeypatch
#319	):
#320	"""If embeddings provider is unavailable at degrade time, the stale
#321	embedding rows must be invalidated so dense recall can't return
#322	semantically misleading hits."""
#323	from mnemosyne.core import embeddings as emb
#324
#325	# Phase 1: provider available — seed.
#326	monkeypatch.setattr(emb, "available", lambda: True)
#327	monkeypatch.setattr(
#328	emb, "embed",
#329	lambda texts: np.stack([_content_to_vec(t) for t in texts]),
#330	)
#331	monkeypatch.setattr(beam_module, "_vec_available", lambda conn: False)
#332
#333	beam = BeamMemory(session_id="s1", db_path=temp_db)
#334	original = ("ORIGINAL_DETAILED_CONTEXT " * 30).strip()
#335	memory_id = beam.consolidate_to_episodic(
#336	summary=original,
#337	source_wm_ids=["fake-wm"],
#338	importance=0.6,
#339	)
#340	assert _read_fallback_embedding(temp_db, memory_id) is not None
#341
#342	# Phase 2: provider goes unavailable BEFORE degrade.
#343	monkeypatch.setattr(emb, "available", lambda: False)
#344
#345	old_ts = (datetime.now() - timedelta(days=beam_module.TIER3_DAYS + 1)).isoformat()
#346	conn = sqlite3.connect(str(temp_db))
#347	conn.execute(
#348	"UPDATE episodic_memory SET tier = 2, created_at = ? WHERE id = ?",
#349	(old_ts, memory_id),
#350	)
#351	conn.commit()
#352	conn.close()
#353
#354	beam.degrade_episodic(dry_run=False)
#355
#356	post_embedding = _read_fallback_embedding(temp_db, memory_id)
#357	assert post_embedding is None, (
#358	"Stale memory_embeddings row remained after degrade with no embeddings "
#359	"provider. Should have been deleted to avoid ranking against content "
#360	"that no longer matches the row's text."
#361	)
#362
#363	post_bv = _read_binary_vector(temp_db, memory_id)
#364	if beam_module._mib is not None:
#365	assert post_bv is None, (
#366	"binary_vector should be NULLed when the embedding provider is "
#367	"unavailable at degrade time"
#368	)
#369

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public