my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	"""
#2	Phase 4: Configurable Hybrid Scoring Tests
#3
#4	Validates:
#5	1. _normalize_weights() with explicit params, env vars, and defaults
#6	2. Backward compatibility (no params = old hardcoded behavior)
#7	3. Weight normalization sums to 1.0
#8	4. Edge cases: all zeros, negative weights, single non-zero weight
#9	5. recall() accepts new weight params and produces different rankings
#10	6. Env var overrides work end-to-end
#11	"""
#12
#13	import os
#14	import sys
#15	import pytest
#16	import tempfile
#17	from pathlib import Path
#18
#19	sys.path.insert(0, str(Path(__file__).parent.parent))
#20
#21	import mnemosyne.core.memory as memory_module
#22	from mnemosyne.core.memory import Mnemosyne
#23	from mnemosyne.core.beam import (
#24	_normalize_weights,
#25	BeamMemory,
#26	init_beam,
#27	_get_connection,
#28	)
#29
#30
#31	# ============================================================================
#32	# _normalize_weights() unit tests
#33	# ============================================================================
#34
#35	class TestNormalizeWeights:
#36	"""Unit tests for the _normalize_weights helper."""
#37
#38	def test_default_weights(self):
#39	"""No params + no env vars = default (0.5, 0.3, 0.2)."""
#40	vw, fw, iw = _normalize_weights(None, None, None)
#41	assert (vw, fw, iw) == pytest.approx((0.5, 0.3, 0.2), abs=1e-6)
#42
#43	def test_explicit_params_override_defaults(self):
#44	"""Explicit params are used directly and normalized."""
#45	vw, fw, iw = _normalize_weights(1.0, 1.0, 1.0)
#46	assert (vw, fw, iw) == pytest.approx((1 / 3, 1 / 3, 1 / 3), abs=1e-6)
#47
#48	def test_explicit_params_no_normalization_needed(self):
#49	"""If they already sum to 1.0, normalization is a no-op."""
#50	vw, fw, iw = _normalize_weights(0.6, 0.3, 0.1)
#51	assert (vw, fw, iw) == pytest.approx((0.6, 0.3, 0.1), abs=1e-6)
#52
#53	def test_normalization_sums_to_one(self):
#54	"""All outputs must sum to exactly 1.0."""
#55	for params in [
#56	(1.0, 2.0, 3.0),
#57	(0.1, 0.1, 0.1),
#58	(10.0, 0.0, 0.0),
#59	(0.0, 5.0, 0.0),
#60	(0.0, 0.0, 7.0),
#61	]:
#62	vw, fw, iw = _normalize_weights(*params)
#63	assert vw + fw + iw == pytest.approx(1.0, abs=1e-9)
#64
#65	def test_all_zeros_fallback(self):
#66	"""All zeros should fall back to defaults."""
#67	vw, fw, iw = _normalize_weights(0.0, 0.0, 0.0)
#68	assert (vw, fw, iw) == pytest.approx((0.5, 0.3, 0.2), abs=1e-6)
#69
#70	def test_negative_weights_clamped(self):
#71	"""Negative inputs are clamped to 0 before normalization."""
#72	vw, fw, iw = _normalize_weights(-0.5, 1.0, 0.5)
#73	# After clamping: 0.0, 1.0, 0.5 -> sum=1.5 -> 0.0, 2/3, 1/3
#74	assert vw == pytest.approx(0.0, abs=1e-6)
#75	assert fw == pytest.approx(2 / 3, abs=1e-6)
#76	assert iw == pytest.approx(1 / 3, abs=1e-6)
#77
#78	def test_single_non_zero_weight(self):
#79	"""Only one non-zero weight becomes 1.0."""
#80	vw, fw, iw = _normalize_weights(0.0, 0.0, 5.0)
#81	assert (vw, fw, iw) == pytest.approx((0.0, 0.0, 1.0), abs=1e-6)
#82
#83	def test_env_var_override(self, monkeypatch):
#84	"""Env vars are used when params are None."""
#85	monkeypatch.setenv("MNEMOSYNE_VEC_WEIGHT", "0.7")
#86	monkeypatch.setenv("MNEMOSYNE_FTS_WEIGHT", "0.2")
#87	monkeypatch.setenv("MNEMOSYNE_IMPORTANCE_WEIGHT", "0.1")
#88	vw, fw, iw = _normalize_weights(None, None, None)
#89	assert (vw, fw, iw) == pytest.approx((0.7, 0.2, 0.1), abs=1e-6)
#90
#91	def test_explicit_params_override_env(self, monkeypatch):
#92	"""Explicit params take precedence over env vars."""
#93	monkeypatch.setenv("MNEMOSYNE_VEC_WEIGHT", "0.7")
#94	monkeypatch.setenv("MNEMOSYNE_FTS_WEIGHT", "0.2")
#95	monkeypatch.setenv("MNEMOSYNE_IMPORTANCE_WEIGHT", "0.1")
#96	vw, fw, iw = _normalize_weights(0.1, 0.1, 0.1)
#97	assert (vw, fw, iw) == pytest.approx((1 / 3, 1 / 3, 1 / 3), abs=1e-6)
#98
#99	def test_partial_env_vars(self, monkeypatch):
#100	"""Missing env vars fall back to defaults, not all-or-nothing."""
#101	monkeypatch.setenv("MNEMOSYNE_VEC_WEIGHT", "0.8")
#102	# fts_weight and importance_weight fall back to defaults
#103	vw, fw, iw = _normalize_weights(None, None, None)
#104	# 0.8 + 0.3 + 0.2 = 1.3 -> normalize
#105	assert vw == pytest.approx(0.8 / 1.3, abs=1e-6)
#106	assert fw == pytest.approx(0.3 / 1.3, abs=1e-6)
#107	assert iw == pytest.approx(0.2 / 1.3, abs=1e-6)
#108
#109
#110	# ============================================================================
#111	# Integration tests: recall() with configurable weights
#112	# ============================================================================
#113
#114	@pytest.fixture
#115	def temp_db():
#116	"""Create a temporary database for each test."""
#117	with tempfile.TemporaryDirectory() as tmpdir:
#118	db_path = Path(tmpdir) / "test.db"
#119	init_beam(db_path)
#120	yield db_path
#121
#122
#123	class TestRecallConfigurableWeights:
#124	"""Integration tests verifying recall() behavior with different weight configs."""
#125
#126	def test_recall_accepts_weight_params(self, temp_db):
#127	"""recall() should accept vec_weight, fts_weight, importance_weight without error."""
#128	beam = BeamMemory(session_id="test", db_path=temp_db)
#129	beam.remember("Python is a programming language", importance=0.8)
#130	beam.remember("JavaScript runs in browsers", importance=0.3)
#131
#132	# Should not raise
#133	results = beam.recall("programming language", top_k=5,
#134	vec_weight=0.6, fts_weight=0.3, importance_weight=0.1)
#135	assert isinstance(results, list)
#136
#137	def test_recall_without_weight_params_is_backward_compatible(self, temp_db):
#138	"""Old code calling recall() without weight params still works."""
#139	beam = BeamMemory(session_id="test", db_path=temp_db)
#140	beam.remember("Python is a programming language", importance=0.8)
#141	beam.remember("JavaScript runs in browsers", importance=0.3)
#142
#143	results = beam.recall("programming language", top_k=5)
#144	assert isinstance(results, list)
#145	assert len(results) > 0
#146
#147	def test_high_importance_weight_boosts_high_importance_memories(self, temp_db):
#148	"""With high importance_weight, high-importance memories rank higher."""
#149	beam = BeamMemory(session_id="test", db_path=temp_db)
#150	beam.remember("A: low importance generic text", importance=0.1)
#151	beam.remember("B: high importance critical alert", importance=0.9)
#152
#153	# Low importance weight: keyword match dominates
#154	results_low_iw = beam.recall("critical alert", top_k=2, importance_weight=0.05)
#155	# High importance weight: importance dominates
#156	results_high_iw = beam.recall("critical alert", top_k=2, importance_weight=0.8)
#157
#158	# Both should return results
#159	assert len(results_low_iw) >= 1
#160	assert len(results_high_iw) >= 1
#161
#162	# With high importance weight, the high-importance memory should score higher
#163	# relative to the low-importance one compared to low importance weight
#164	low_iw_scores = {r["content"][:20]: r["score"] for r in results_low_iw}
#165	high_iw_scores = {r["content"][:20]: r["score"] for r in results_high_iw}
#166
#167	# The high-importance memory (B) should be present in both
#168	assert any("B:" in r["content"] for r in results_low_iw)
#169	assert any("B:" in r["content"] for r in results_high_iw)
#170
#171	def test_results_include_score_breakdown(self, temp_db):
#172	"""Result dicts should include dense_score, fts_score, importance fields."""
#173	beam = BeamMemory(session_id="test", db_path=temp_db)
#174	beam.remember("Test content for scoring breakdown", importance=0.5)
#175
#176	results = beam.recall("test content", top_k=1,
#177	vec_weight=0.4, fts_weight=0.4, importance_weight=0.2)
#178	assert len(results) > 0
#179	r = results[0]
#180	assert "dense_score" in r
#181	assert "fts_score" in r
#182	assert "importance" in r
#183	assert "score" in r
#184
#185	def test_env_vars_affect_scoring(self, temp_db, monkeypatch):
#186	"""Env vars should affect recall() scoring when params are not provided."""
#187	monkeypatch.setenv("MNEMOSYNE_VEC_WEIGHT", "0.1")
#188	monkeypatch.setenv("MNEMOSYNE_FTS_WEIGHT", "0.1")
#189	monkeypatch.setenv("MNEMOSYNE_IMPORTANCE_WEIGHT", "0.8")
#190
#191	beam = BeamMemory(session_id="test", db_path=temp_db)
#192	beam.remember("Content A", importance=0.2)
#193	beam.remember("Content B", importance=0.9)
#194
#195	# Without explicit params, env vars should be used
#196	results = beam.recall("content", top_k=2)
#197	assert len(results) >= 1
#198	# With 80% importance weight, the high-importance item should dominate
#199	top_result = results[0]
#200	assert top_result["importance"] >= 0.5 # Likely the high-importance one
#201
#202	def test_explicit_params_override_env_in_recall(self, temp_db, monkeypatch):
#203	"""Explicit params in recall() should override env vars."""
#204	monkeypatch.setenv("MNEMOSYNE_VEC_WEIGHT", "0.1")
#205	monkeypatch.setenv("MNEMOSYNE_FTS_WEIGHT", "0.1")
#206	monkeypatch.setenv("MNEMOSYNE_IMPORTANCE_WEIGHT", "0.8")
#207
#208	beam = BeamMemory(session_id="test", db_path=temp_db)
#209	beam.remember("Test content", importance=0.5)
#210
#211	# Call with explicit params that differ from env
#212	results = beam.recall("test", top_k=1,
#213	vec_weight=0.5, fts_weight=0.3, importance_weight=0.2)
#214	assert len(results) > 0
#215	# Should succeed without error = params were accepted
#216
#217	def test_weight_params_dont_break_temporal_scoring(self, temp_db):
#218	"""Weight params should coexist with temporal_weight from Phase 3."""
#219	beam = BeamMemory(session_id="test", db_path=temp_db)
#220	beam.remember("Recent event happened today", importance=0.5)
#221
#222	results = beam.recall("event", top_k=1,
#223	vec_weight=0.4, fts_weight=0.3, importance_weight=0.3,
#224	temporal_weight=0.5, query_time="2099-01-01")
#225	assert isinstance(results, list)
#226
#227	def test_zero_all_weights_uses_defaults_in_recall(self, temp_db):
#228	"""Passing all zeros should trigger fallback to defaults."""
#229	beam = BeamMemory(session_id="test", db_path=temp_db)
#230	beam.remember("Some content here", importance=0.5)
#231
#232	# Should not crash; internally falls back to (0.5, 0.3, 0.2)
#233	results = beam.recall("content", top_k=1,
#234	vec_weight=0.0, fts_weight=0.0, importance_weight=0.0)
#235	assert len(results) > 0
#236
#237
#238	class TestPublicRecallConfigurableWeights:
#239	"""Public Mnemosyne recall wrappers should expose BeamMemory scoring weights."""
#240
#241	def test_mnemosyne_recall_accepts_weight_params(self, temp_db):
#242	"""Mnemosyne.recall() should forward scoring weights to BeamMemory.recall()."""
#243	mem = Mnemosyne(session_id="test", db_path=temp_db)
#244	mem.remember("Python is a programming language", importance=0.8)
#245
#246	results = mem.recall(
#247	"programming language",
#248	top_k=5,
#249	vec_weight=0.6,
#250	fts_weight=0.3,
#251	importance_weight=0.1,
#252	)
#253
#254	assert isinstance(results, list)
#255	assert len(results) > 0
#256
#257	def test_module_recall_accepts_weight_params(self, monkeypatch):
#258	"""mnemosyne.recall() module helper should expose the same scoring weights."""
#259	class FakeMemory:
#260	def recall(self, args, *kwargs):
#261	self.args = args
#262	self.kwargs = kwargs
#263	return [{"id": "test", "content": "weight forwarding"}]
#264
#265	fake = FakeMemory()
#266	monkeypatch.setattr(memory_module, "_get_default", lambda bank=None: fake)
#267
#268	results = memory_module.recall(
#269	"weight forwarding",
#270	top_k=5,
#271	vec_weight=0.6,
#272	fts_weight=0.3,
#273	importance_weight=0.1,
#274	)
#275
#276	assert isinstance(results, list)
#277	assert len(results) > 0
#278	assert fake.kwargs["vec_weight"] == 0.6
#279	assert fake.kwargs["fts_weight"] == 0.3
#280	assert fake.kwargs["importance_weight"] == 0.1
#281
#282
#283	# ============================================================================
#284	# Edge case tests
#285	# ============================================================================
#286
#287	class TestEdgeCases:
#288	"""Boundary conditions and error handling."""
#289
#290	def test_very_high_vec_weight(self, temp_db):
#291	"""vec_weight=1.0 should make vector similarity dominate."""
#292	beam = BeamMemory(session_id="test", db_path=temp_db)
#293	beam.remember("Content for vector test", importance=0.1)
#294	results = beam.recall("vector test", top_k=1,
#295	vec_weight=1.0, fts_weight=0.0, importance_weight=0.0)
#296	assert len(results) >= 0 # May be empty if no embeddings, but should not crash
#297
#298	def test_very_high_fts_weight(self, temp_db):
#299	"""fts_weight=1.0 should make text match dominate."""
#300	beam = BeamMemory(session_id="test", db_path=temp_db)
#301	beam.remember("Exact text match phrase", importance=0.1)
#302	results = beam.recall("exact text match", top_k=1,
#303	vec_weight=0.0, fts_weight=1.0, importance_weight=0.0)
#304	assert len(results) > 0
#305	assert "exact" in results[0]["content"].lower()
#306
#307	def test_invalid_negative_param_clamped(self, temp_db):
#308	"""Negative weight params should be clamped to 0."""
#309	beam = BeamMemory(session_id="test", db_path=temp_db)
#310	beam.remember("Test content", importance=0.5)
#311
#312	# Should not raise; negative values are clamped
#313	results = beam.recall("test", top_k=1,
#314	vec_weight=-0.5, fts_weight=1.0, importance_weight=0.5)
#315	assert len(results) > 0
#316
#317
#318	# ============================================================================
#319	# Run standalone
#320	# ============================================================================
#321
#322	if __name__ == "__main__":
#323	pytest.main([__file__, "-v", "--tb=short"])
#324

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public