my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	"""Regression tests for [C12.a]: Mnemosyne.remember(extract=True) writes
#2	fact triples but skips the `facts` table. The canonical helper
#3	_extract_and_store_facts in beam.py writes BOTH tables; the wrapper's
#4	inline extract block only wrote triples. As a result, wrapper-extracted
#5	facts were visible through recall() (which scores fact triples) but
#6	invisible through fact_recall() (which queries the facts table directly).
#7
#8	Bug: mnemosyne/core/memory.py — wrapper's `if extract:` block only called
#9	triples.add_facts(), never _store_facts_in_table().
#10
#11	These tests assert wrapper / direct parity across all four observable
#12	effects of extract=True:
#13	1. triples table populated
#14	2. facts table populated
#15	3. recall() can find the memory
#16	4. fact_recall() can find the fact
#17
#18	Plus a parity check for extract_entities=True.
#19	"""
#20
#21	import pytest
#22
#23	from mnemosyne.core.memory import Mnemosyne
#24
#25
#26	@pytest.fixture
#27	def fake_extract_facts(monkeypatch):
#28	"""Patch extract_facts_safe to return deterministic facts.
#29	Both Mnemosyne.remember and BeamMemory's _extract_and_store_facts
#30	import via `from mnemosyne.core.extraction import extract_facts_safe`,
#31	so a module-level patch covers both paths.
#32	"""
#33	facts = [
#34	"alice was born in boston",
#35	"alice studied mathematics at MIT",
#36	]
#37	monkeypatch.setattr(
#38	"mnemosyne.core.extraction.extract_facts_safe",
#39	lambda content, **kwargs: list(facts),
#40	)
#41	return facts
#42
#43
#44	def _facts_table_count(db_path) -> int:
#45	"""Count rows in the facts table directly. Returns 0 if table missing
#46	(the bug surface — table never gets written when wrapper fails to
#47	populate it for the first time)."""
#48	import sqlite3
#49	conn = sqlite3.connect(str(db_path))
#50	try:
#51	cur = conn.execute("SELECT COUNT(*) FROM facts")
#52	return cur.fetchone()[0]
#53	except sqlite3.OperationalError:
#54	return 0
#55	finally:
#56	conn.close()
#57
#58
#59	def _triples_table_count(db_path) -> int:
#60	import sqlite3
#61	conn = sqlite3.connect(str(db_path))
#62	try:
#63	cur = conn.execute("SELECT COUNT(*) FROM triples")
#64	return cur.fetchone()[0]
#65	except sqlite3.OperationalError:
#66	return 0
#67	finally:
#68	conn.close()
#69
#70
#71	class TestWrapperExtractFactsTableParity:
#72	"""C12.a: Mnemosyne.remember(extract=True) must populate the facts
#73	table the same way BeamMemory.remember(extract=True) does."""
#74
#75	def test_wrapper_extract_writes_facts_table(self, tmp_path, fake_extract_facts):
#76	db_path = tmp_path / "c12a.db"
#77	mem = Mnemosyne(session_id="c12a", db_path=db_path)
#78	mem.remember(
#79	"Alice was born in Boston in 1990 and studied math at MIT.",
#80	source="user",
#81	extract=True,
#82	)
#83	assert _facts_table_count(db_path) >= 1, (
#84	"facts table empty after Mnemosyne.remember(extract=True); "
#85	"wrapper path should populate it like BeamMemory does"
#86	)
#87
#88	def test_wrapper_extract_still_writes_triples_table(self, tmp_path, fake_extract_facts):
#89	"""Regression guard: the facts-table fix must NOT remove the
#90	existing triples write."""
#91	db_path = tmp_path / "c12a.db"
#92	mem = Mnemosyne(session_id="c12a", db_path=db_path)
#93	mem.remember(
#94	"Alice was born in Boston in 1990 and studied math at MIT.",
#95	source="user",
#96	extract=True,
#97	)
#98	assert _triples_table_count(db_path) >= 1, (
#99	"triples table empty after extract=True; the wrapper's existing "
#100	"behavior must not regress"
#101	)
#102
#103	def test_wrapper_extracted_fact_is_visible_via_fact_recall(self, tmp_path, fake_extract_facts):
#104	"""The contract: extract=True through the wrapper must produce
#105	facts retrievable through the public fact_recall surface."""
#106	db_path = tmp_path / "c12a.db"
#107	mem = Mnemosyne(session_id="c12a", db_path=db_path)
#108	mem.remember(
#109	"Alice was born in Boston in 1990 and studied math at MIT.",
#110	source="user",
#111	extract=True,
#112	)
#113	results = mem.beam.fact_recall("alice")
#114	assert results, "fact_recall returned no results for wrapper-extracted facts"
#115	contents = " ".join(str(r.get("content", "")).lower() for r in results)
#116	assert "alice" in contents
#117
#118	def test_extract_runs_on_dedup_for_backfill(self, tmp_path, fake_extract_facts):
#119	"""Backfill contract: a user with pre-existing working_memory rows
#120	(written before extract=True was supported) calls
#121	`mem.remember(same_content, extract=True)` to populate the facts
#122	table after-the-fact. Even though the dedup path fires (content
#123	already exists), extraction must still run.
#124
#125	Pre-fix this regression scenario was silently broken: my initial
#126	delegation moved extraction inside BeamMemory.remember, which has
#127	an early-return on dedup that skipped both extract blocks. Locks
#128	in the fix that makes the dedup branch also call
#129	_extract_and_store_facts / _extract_and_store_entities.
#130	"""
#131	from mnemosyne.core.beam import BeamMemory
#132	db_path = tmp_path / "c12a.db"
#133	# Pre-existing row, no extraction (simulating an old DB)
#134	beam = BeamMemory(session_id="c12a", db_path=db_path)
#135	first_id = beam.remember(
#136	"Alice was born in Boston in 1990 and studied math at MIT.",
#137	source="user",
#138	extract=False,
#139	)
#140	# Backfill: same content, now with extract=True
#141	mem = Mnemosyne(session_id="c12a", db_path=db_path)
#142	second_id = mem.remember(
#143	"Alice was born in Boston in 1990 and studied math at MIT.",
#144	source="user",
#145	extract=True,
#146	)
#147	assert first_id == second_id, (
#148	"Dedup did not fire: backfill expectation requires the "
#149	"second call to recognize the existing row"
#150	)
#151	assert _facts_table_count(db_path) >= 1, (
#152	"Backfill failed: facts table empty after extract=True on "
#153	"duplicate content. Dedup branch in BeamMemory.remember must "
#154	"run extraction so the C12.a contract holds for backfill scenarios."
#155	)
#156
#157	def test_wrapper_and_direct_paths_produce_same_table_state(self, tmp_path, fake_extract_facts):
#158	"""Wrapper path and direct-Beam path must produce equivalent
#159	fact-table state for the same input. Eliminates the asymmetry
#160	that v2 plan §C12.a calls out."""
#161	wrapper_db = tmp_path / "wrapper.db"
#162	direct_db = tmp_path / "direct.db"
#163	content = "Alice was born in Boston in 1990 and studied math at MIT."
#164
#165	wrapper_mem = Mnemosyne(session_id="parity", db_path=wrapper_db)
#166	wrapper_mem.remember(content, source="user", extract=True)
#167
#168	# Direct path: BeamMemory.remember(extract=True) is the canonical
#169	# one that already populates both tables.
#170	from mnemosyne.core.beam import BeamMemory
#171	direct_beam = BeamMemory(session_id="parity", db_path=direct_db)
#172	direct_beam.remember(content, source="user", extract=True)
#173
#174	wrapper_facts = _facts_table_count(wrapper_db)
#175	direct_facts = _facts_table_count(direct_db)
#176	assert wrapper_facts == direct_facts, (
#177	f"Wrapper wrote {wrapper_facts} facts rows; direct wrote {direct_facts}. "
#178	f"Paths should produce identical fact-table state for identical input."
#179	)
#180
#181
#182	class TestWrapperExtractEntitiesParity:
#183	"""Adjacent parity check: extract_entities=True path. Option A delegates
#184	this to BeamMemory's _extract_and_store_entities helper; this test
#185	locks in equivalent observable behavior."""
#186
#187	def test_wrapper_extract_entities_writes_mention_triples(self, tmp_path):
#188	db_path = tmp_path / "c12a.db"
#189	mem = Mnemosyne(session_id="c12a", db_path=db_path)
#190	# A content string that the regex extractor will pick entities from.
#191	# extract_entities_regex matches things like CapitalizedWords and
#192	# quoted strings depending on the regex. Use a simple sentence with
#193	# capitalized proper nouns.
#194	mem.remember(
#195	"Alice met Bob in Paris last Tuesday.",
#196	source="user",
#197	extract_entities=True,
#198	)
#199	# At least one triple with predicate='mentions' should exist.
#200	import sqlite3
#201	conn = sqlite3.connect(str(db_path))
#202	try:
#203	cur = conn.execute(
#204	"SELECT COUNT(*) FROM triples WHERE predicate = 'mentions'"
#205	)
#206	count = cur.fetchone()[0]
#207	finally:
#208	conn.close()
#209	assert count >= 1, (
#210	"extract_entities=True did not produce 'mentions' triples"
#211	)
#212

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public