repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources15d ago| #1 | """ |
| #2 | Phase 4: Configurable Hybrid Scoring Tests |
| #3 | |
| #4 | Validates: |
| #5 | 1. _normalize_weights() with explicit params, env vars, and defaults |
| #6 | 2. Backward compatibility (no params = old hardcoded behavior) |
| #7 | 3. Weight normalization sums to 1.0 |
| #8 | 4. Edge cases: all zeros, negative weights, single non-zero weight |
| #9 | 5. recall() accepts new weight params and produces different rankings |
| #10 | 6. Env var overrides work end-to-end |
| #11 | """ |
| #12 | |
| #13 | import os |
| #14 | import sys |
| #15 | import pytest |
| #16 | import tempfile |
| #17 | from pathlib import Path |
| #18 | |
| #19 | sys.path.insert(0, str(Path(__file__).parent.parent)) |
| #20 | |
| #21 | import mnemosyne.core.memory as memory_module |
| #22 | from mnemosyne.core.memory import Mnemosyne |
| #23 | from mnemosyne.core.beam import ( |
| #24 | _normalize_weights, |
| #25 | BeamMemory, |
| #26 | init_beam, |
| #27 | _get_connection, |
| #28 | ) |
| #29 | |
| #30 | |
| #31 | # ============================================================================ |
| #32 | # _normalize_weights() unit tests |
| #33 | # ============================================================================ |
| #34 | |
| #35 | class TestNormalizeWeights: |
| #36 | """Unit tests for the _normalize_weights helper.""" |
| #37 | |
| #38 | def test_default_weights(self): |
| #39 | """No params + no env vars = default (0.5, 0.3, 0.2).""" |
| #40 | vw, fw, iw = _normalize_weights(None, None, None) |
| #41 | assert (vw, fw, iw) == pytest.approx((0.5, 0.3, 0.2), abs=1e-6) |
| #42 | |
| #43 | def test_explicit_params_override_defaults(self): |
| #44 | """Explicit params are used directly and normalized.""" |
| #45 | vw, fw, iw = _normalize_weights(1.0, 1.0, 1.0) |
| #46 | assert (vw, fw, iw) == pytest.approx((1 / 3, 1 / 3, 1 / 3), abs=1e-6) |
| #47 | |
| #48 | def test_explicit_params_no_normalization_needed(self): |
| #49 | """If they already sum to 1.0, normalization is a no-op.""" |
| #50 | vw, fw, iw = _normalize_weights(0.6, 0.3, 0.1) |
| #51 | assert (vw, fw, iw) == pytest.approx((0.6, 0.3, 0.1), abs=1e-6) |
| #52 | |
| #53 | def test_normalization_sums_to_one(self): |
| #54 | """All outputs must sum to exactly 1.0.""" |
| #55 | for params in [ |
| #56 | (1.0, 2.0, 3.0), |
| #57 | (0.1, 0.1, 0.1), |
| #58 | (10.0, 0.0, 0.0), |
| #59 | (0.0, 5.0, 0.0), |
| #60 | (0.0, 0.0, 7.0), |
| #61 | ]: |
| #62 | vw, fw, iw = _normalize_weights(*params) |
| #63 | assert vw + fw + iw == pytest.approx(1.0, abs=1e-9) |
| #64 | |
| #65 | def test_all_zeros_fallback(self): |
| #66 | """All zeros should fall back to defaults.""" |
| #67 | vw, fw, iw = _normalize_weights(0.0, 0.0, 0.0) |
| #68 | assert (vw, fw, iw) == pytest.approx((0.5, 0.3, 0.2), abs=1e-6) |
| #69 | |
| #70 | def test_negative_weights_clamped(self): |
| #71 | """Negative inputs are clamped to 0 before normalization.""" |
| #72 | vw, fw, iw = _normalize_weights(-0.5, 1.0, 0.5) |
| #73 | # After clamping: 0.0, 1.0, 0.5 -> sum=1.5 -> 0.0, 2/3, 1/3 |
| #74 | assert vw == pytest.approx(0.0, abs=1e-6) |
| #75 | assert fw == pytest.approx(2 / 3, abs=1e-6) |
| #76 | assert iw == pytest.approx(1 / 3, abs=1e-6) |
| #77 | |
| #78 | def test_single_non_zero_weight(self): |
| #79 | """Only one non-zero weight becomes 1.0.""" |
| #80 | vw, fw, iw = _normalize_weights(0.0, 0.0, 5.0) |
| #81 | assert (vw, fw, iw) == pytest.approx((0.0, 0.0, 1.0), abs=1e-6) |
| #82 | |
| #83 | def test_env_var_override(self, monkeypatch): |
| #84 | """Env vars are used when params are None.""" |
| #85 | monkeypatch.setenv("MNEMOSYNE_VEC_WEIGHT", "0.7") |
| #86 | monkeypatch.setenv("MNEMOSYNE_FTS_WEIGHT", "0.2") |
| #87 | monkeypatch.setenv("MNEMOSYNE_IMPORTANCE_WEIGHT", "0.1") |
| #88 | vw, fw, iw = _normalize_weights(None, None, None) |
| #89 | assert (vw, fw, iw) == pytest.approx((0.7, 0.2, 0.1), abs=1e-6) |
| #90 | |
| #91 | def test_explicit_params_override_env(self, monkeypatch): |
| #92 | """Explicit params take precedence over env vars.""" |
| #93 | monkeypatch.setenv("MNEMOSYNE_VEC_WEIGHT", "0.7") |
| #94 | monkeypatch.setenv("MNEMOSYNE_FTS_WEIGHT", "0.2") |
| #95 | monkeypatch.setenv("MNEMOSYNE_IMPORTANCE_WEIGHT", "0.1") |
| #96 | vw, fw, iw = _normalize_weights(0.1, 0.1, 0.1) |
| #97 | assert (vw, fw, iw) == pytest.approx((1 / 3, 1 / 3, 1 / 3), abs=1e-6) |
| #98 | |
| #99 | def test_partial_env_vars(self, monkeypatch): |
| #100 | """Missing env vars fall back to defaults, not all-or-nothing.""" |
| #101 | monkeypatch.setenv("MNEMOSYNE_VEC_WEIGHT", "0.8") |
| #102 | # fts_weight and importance_weight fall back to defaults |
| #103 | vw, fw, iw = _normalize_weights(None, None, None) |
| #104 | # 0.8 + 0.3 + 0.2 = 1.3 -> normalize |
| #105 | assert vw == pytest.approx(0.8 / 1.3, abs=1e-6) |
| #106 | assert fw == pytest.approx(0.3 / 1.3, abs=1e-6) |
| #107 | assert iw == pytest.approx(0.2 / 1.3, abs=1e-6) |
| #108 | |
| #109 | |
| #110 | # ============================================================================ |
| #111 | # Integration tests: recall() with configurable weights |
| #112 | # ============================================================================ |
| #113 | |
| #114 | @pytest.fixture |
| #115 | def temp_db(): |
| #116 | """Create a temporary database for each test.""" |
| #117 | with tempfile.TemporaryDirectory() as tmpdir: |
| #118 | db_path = Path(tmpdir) / "test.db" |
| #119 | init_beam(db_path) |
| #120 | yield db_path |
| #121 | |
| #122 | |
| #123 | class TestRecallConfigurableWeights: |
| #124 | """Integration tests verifying recall() behavior with different weight configs.""" |
| #125 | |
| #126 | def test_recall_accepts_weight_params(self, temp_db): |
| #127 | """recall() should accept vec_weight, fts_weight, importance_weight without error.""" |
| #128 | beam = BeamMemory(session_id="test", db_path=temp_db) |
| #129 | beam.remember("Python is a programming language", importance=0.8) |
| #130 | beam.remember("JavaScript runs in browsers", importance=0.3) |
| #131 | |
| #132 | # Should not raise |
| #133 | results = beam.recall("programming language", top_k=5, |
| #134 | vec_weight=0.6, fts_weight=0.3, importance_weight=0.1) |
| #135 | assert isinstance(results, list) |
| #136 | |
| #137 | def test_recall_without_weight_params_is_backward_compatible(self, temp_db): |
| #138 | """Old code calling recall() without weight params still works.""" |
| #139 | beam = BeamMemory(session_id="test", db_path=temp_db) |
| #140 | beam.remember("Python is a programming language", importance=0.8) |
| #141 | beam.remember("JavaScript runs in browsers", importance=0.3) |
| #142 | |
| #143 | results = beam.recall("programming language", top_k=5) |
| #144 | assert isinstance(results, list) |
| #145 | assert len(results) > 0 |
| #146 | |
| #147 | def test_high_importance_weight_boosts_high_importance_memories(self, temp_db): |
| #148 | """With high importance_weight, high-importance memories rank higher.""" |
| #149 | beam = BeamMemory(session_id="test", db_path=temp_db) |
| #150 | beam.remember("A: low importance generic text", importance=0.1) |
| #151 | beam.remember("B: high importance critical alert", importance=0.9) |
| #152 | |
| #153 | # Low importance weight: keyword match dominates |
| #154 | results_low_iw = beam.recall("critical alert", top_k=2, importance_weight=0.05) |
| #155 | # High importance weight: importance dominates |
| #156 | results_high_iw = beam.recall("critical alert", top_k=2, importance_weight=0.8) |
| #157 | |
| #158 | # Both should return results |
| #159 | assert len(results_low_iw) >= 1 |
| #160 | assert len(results_high_iw) >= 1 |
| #161 | |
| #162 | # With high importance weight, the high-importance memory should score higher |
| #163 | # relative to the low-importance one compared to low importance weight |
| #164 | low_iw_scores = {r["content"][:20]: r["score"] for r in results_low_iw} |
| #165 | high_iw_scores = {r["content"][:20]: r["score"] for r in results_high_iw} |
| #166 | |
| #167 | # The high-importance memory (B) should be present in both |
| #168 | assert any("B:" in r["content"] for r in results_low_iw) |
| #169 | assert any("B:" in r["content"] for r in results_high_iw) |
| #170 | |
| #171 | def test_results_include_score_breakdown(self, temp_db): |
| #172 | """Result dicts should include dense_score, fts_score, importance fields.""" |
| #173 | beam = BeamMemory(session_id="test", db_path=temp_db) |
| #174 | beam.remember("Test content for scoring breakdown", importance=0.5) |
| #175 | |
| #176 | results = beam.recall("test content", top_k=1, |
| #177 | vec_weight=0.4, fts_weight=0.4, importance_weight=0.2) |
| #178 | assert len(results) > 0 |
| #179 | r = results[0] |
| #180 | assert "dense_score" in r |
| #181 | assert "fts_score" in r |
| #182 | assert "importance" in r |
| #183 | assert "score" in r |
| #184 | |
| #185 | def test_env_vars_affect_scoring(self, temp_db, monkeypatch): |
| #186 | """Env vars should affect recall() scoring when params are not provided.""" |
| #187 | monkeypatch.setenv("MNEMOSYNE_VEC_WEIGHT", "0.1") |
| #188 | monkeypatch.setenv("MNEMOSYNE_FTS_WEIGHT", "0.1") |
| #189 | monkeypatch.setenv("MNEMOSYNE_IMPORTANCE_WEIGHT", "0.8") |
| #190 | |
| #191 | beam = BeamMemory(session_id="test", db_path=temp_db) |
| #192 | beam.remember("Content A", importance=0.2) |
| #193 | beam.remember("Content B", importance=0.9) |
| #194 | |
| #195 | # Without explicit params, env vars should be used |
| #196 | results = beam.recall("content", top_k=2) |
| #197 | assert len(results) >= 1 |
| #198 | # With 80% importance weight, the high-importance item should dominate |
| #199 | top_result = results[0] |
| #200 | assert top_result["importance"] >= 0.5 # Likely the high-importance one |
| #201 | |
| #202 | def test_explicit_params_override_env_in_recall(self, temp_db, monkeypatch): |
| #203 | """Explicit params in recall() should override env vars.""" |
| #204 | monkeypatch.setenv("MNEMOSYNE_VEC_WEIGHT", "0.1") |
| #205 | monkeypatch.setenv("MNEMOSYNE_FTS_WEIGHT", "0.1") |
| #206 | monkeypatch.setenv("MNEMOSYNE_IMPORTANCE_WEIGHT", "0.8") |
| #207 | |
| #208 | beam = BeamMemory(session_id="test", db_path=temp_db) |
| #209 | beam.remember("Test content", importance=0.5) |
| #210 | |
| #211 | # Call with explicit params that differ from env |
| #212 | results = beam.recall("test", top_k=1, |
| #213 | vec_weight=0.5, fts_weight=0.3, importance_weight=0.2) |
| #214 | assert len(results) > 0 |
| #215 | # Should succeed without error = params were accepted |
| #216 | |
| #217 | def test_weight_params_dont_break_temporal_scoring(self, temp_db): |
| #218 | """Weight params should coexist with temporal_weight from Phase 3.""" |
| #219 | beam = BeamMemory(session_id="test", db_path=temp_db) |
| #220 | beam.remember("Recent event happened today", importance=0.5) |
| #221 | |
| #222 | results = beam.recall("event", top_k=1, |
| #223 | vec_weight=0.4, fts_weight=0.3, importance_weight=0.3, |
| #224 | temporal_weight=0.5, query_time="2099-01-01") |
| #225 | assert isinstance(results, list) |
| #226 | |
| #227 | def test_zero_all_weights_uses_defaults_in_recall(self, temp_db): |
| #228 | """Passing all zeros should trigger fallback to defaults.""" |
| #229 | beam = BeamMemory(session_id="test", db_path=temp_db) |
| #230 | beam.remember("Some content here", importance=0.5) |
| #231 | |
| #232 | # Should not crash; internally falls back to (0.5, 0.3, 0.2) |
| #233 | results = beam.recall("content", top_k=1, |
| #234 | vec_weight=0.0, fts_weight=0.0, importance_weight=0.0) |
| #235 | assert len(results) > 0 |
| #236 | |
| #237 | |
| #238 | class TestPublicRecallConfigurableWeights: |
| #239 | """Public Mnemosyne recall wrappers should expose BeamMemory scoring weights.""" |
| #240 | |
| #241 | def test_mnemosyne_recall_accepts_weight_params(self, temp_db): |
| #242 | """Mnemosyne.recall() should forward scoring weights to BeamMemory.recall().""" |
| #243 | mem = Mnemosyne(session_id="test", db_path=temp_db) |
| #244 | mem.remember("Python is a programming language", importance=0.8) |
| #245 | |
| #246 | results = mem.recall( |
| #247 | "programming language", |
| #248 | top_k=5, |
| #249 | vec_weight=0.6, |
| #250 | fts_weight=0.3, |
| #251 | importance_weight=0.1, |
| #252 | ) |
| #253 | |
| #254 | assert isinstance(results, list) |
| #255 | assert len(results) > 0 |
| #256 | |
| #257 | def test_module_recall_accepts_weight_params(self, monkeypatch): |
| #258 | """mnemosyne.recall() module helper should expose the same scoring weights.""" |
| #259 | class FakeMemory: |
| #260 | def recall(self, *args, **kwargs): |
| #261 | self.args = args |
| #262 | self.kwargs = kwargs |
| #263 | return [{"id": "test", "content": "weight forwarding"}] |
| #264 | |
| #265 | fake = FakeMemory() |
| #266 | monkeypatch.setattr(memory_module, "_get_default", lambda bank=None: fake) |
| #267 | |
| #268 | results = memory_module.recall( |
| #269 | "weight forwarding", |
| #270 | top_k=5, |
| #271 | vec_weight=0.6, |
| #272 | fts_weight=0.3, |
| #273 | importance_weight=0.1, |
| #274 | ) |
| #275 | |
| #276 | assert isinstance(results, list) |
| #277 | assert len(results) > 0 |
| #278 | assert fake.kwargs["vec_weight"] == 0.6 |
| #279 | assert fake.kwargs["fts_weight"] == 0.3 |
| #280 | assert fake.kwargs["importance_weight"] == 0.1 |
| #281 | |
| #282 | |
| #283 | # ============================================================================ |
| #284 | # Edge case tests |
| #285 | # ============================================================================ |
| #286 | |
| #287 | class TestEdgeCases: |
| #288 | """Boundary conditions and error handling.""" |
| #289 | |
| #290 | def test_very_high_vec_weight(self, temp_db): |
| #291 | """vec_weight=1.0 should make vector similarity dominate.""" |
| #292 | beam = BeamMemory(session_id="test", db_path=temp_db) |
| #293 | beam.remember("Content for vector test", importance=0.1) |
| #294 | results = beam.recall("vector test", top_k=1, |
| #295 | vec_weight=1.0, fts_weight=0.0, importance_weight=0.0) |
| #296 | assert len(results) >= 0 # May be empty if no embeddings, but should not crash |
| #297 | |
| #298 | def test_very_high_fts_weight(self, temp_db): |
| #299 | """fts_weight=1.0 should make text match dominate.""" |
| #300 | beam = BeamMemory(session_id="test", db_path=temp_db) |
| #301 | beam.remember("Exact text match phrase", importance=0.1) |
| #302 | results = beam.recall("exact text match", top_k=1, |
| #303 | vec_weight=0.0, fts_weight=1.0, importance_weight=0.0) |
| #304 | assert len(results) > 0 |
| #305 | assert "exact" in results[0]["content"].lower() |
| #306 | |
| #307 | def test_invalid_negative_param_clamped(self, temp_db): |
| #308 | """Negative weight params should be clamped to 0.""" |
| #309 | beam = BeamMemory(session_id="test", db_path=temp_db) |
| #310 | beam.remember("Test content", importance=0.5) |
| #311 | |
| #312 | # Should not raise; negative values are clamped |
| #313 | results = beam.recall("test", top_k=1, |
| #314 | vec_weight=-0.5, fts_weight=1.0, importance_weight=0.5) |
| #315 | assert len(results) > 0 |
| #316 | |
| #317 | |
| #318 | # ============================================================================ |
| #319 | # Run standalone |
| #320 | # ============================================================================ |
| #321 | |
| #322 | if __name__ == "__main__": |
| #323 | pytest.main([__file__, "-v", "--tb=short"]) |
| #324 |