repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources15d ago| #1 | """ |
| #2 | Tests for Mnemosyne Structured Fact Extraction (Phase 2) |
| #3 | """ |
| #4 | |
| #5 | import os |
| #6 | import sys |
| #7 | import json |
| #8 | import sqlite3 |
| #9 | import tempfile |
| #10 | from pathlib import Path |
| #11 | |
| #12 | # Add parent to path |
| #13 | sys.path.insert(0, str(Path(__file__).parent.parent)) |
| #14 | |
| #15 | from mnemosyne.core.extraction import ( |
| #16 | extract_facts, |
| #17 | extract_facts_safe, |
| #18 | _build_extraction_prompt, |
| #19 | _parse_facts, |
| #20 | EXTRACTION_PROMPT, |
| #21 | ) |
| #22 | from mnemosyne.core.triples import TripleStore, init_triples |
| #23 | |
| #24 | |
| #25 | class MockLLM: |
| #26 | """Mock LLM that returns predictable responses.""" |
| #27 | def __init__(self, response="The user loves coffee\nThe user hates mornings"): |
| #28 | self.response = response |
| #29 | self.call_count = 0 |
| #30 | self.last_prompt = None |
| #31 | |
| #32 | def __call__(self, prompt, **kwargs): |
| #33 | self.call_count += 1 |
| #34 | self.last_prompt = prompt |
| #35 | return self.response |
| #36 | |
| #37 | |
| #38 | def test_build_extraction_prompt(): |
| #39 | """Test that the extraction prompt includes the user text.""" |
| #40 | prompt = _build_extraction_prompt("I love coffee") |
| #41 | assert "I love coffee" in prompt |
| #42 | assert "Extract" in prompt or "extract" in prompt.lower() |
| #43 | print("PASS: test_build_extraction_prompt") |
| #44 | |
| #45 | |
| #46 | def test_parse_facts_basic(): |
| #47 | """Test parsing LLM output into facts.""" |
| #48 | raw = "The user loves coffee\nThe user hates mornings\n" |
| #49 | facts = _parse_facts(raw) |
| #50 | assert len(facts) == 2 |
| #51 | assert "loves coffee" in facts[0] |
| #52 | assert "hates mornings" in facts[1] |
| #53 | print("PASS: test_parse_facts_basic") |
| #54 | |
| #55 | |
| #56 | def test_parse_facts_with_numbering(): |
| #57 | """Test parsing facts with numbering/bullets.""" |
| #58 | raw = "1. The user loves coffee\n2. The user hates mornings\n- User prefers tea\n* User dislikes rain" |
| #59 | facts = _parse_facts(raw) |
| #60 | assert len(facts) == 4 |
| #61 | assert all(not fact.startswith(("1.", "2.", "-", "*")) for fact in facts) |
| #62 | print("PASS: test_parse_facts_with_numbering") |
| #63 | |
| #64 | |
| #65 | def test_parse_facts_no_facts(): |
| #66 | """Test parsing 'NO_FACTS' response.""" |
| #67 | facts = _parse_facts("NO_FACTS") |
| #68 | assert facts == [] |
| #69 | print("PASS: test_parse_facts_no_facts") |
| #70 | |
| #71 | |
| #72 | def test_parse_facts_empty(): |
| #73 | """Test parsing empty response.""" |
| #74 | facts = _parse_facts("") |
| #75 | assert facts == [] |
| #76 | facts = _parse_facts(" \n ") |
| #77 | assert facts == [] |
| #78 | print("PASS: test_parse_facts_empty") |
| #79 | |
| #80 | |
| #81 | def test_extract_facts_safe_no_llm(): |
| #82 | """Test that extract_facts_safe returns empty list when no LLM.""" |
| #83 | from unittest.mock import patch |
| #84 | |
| #85 | # Patch llm_available at the extraction module level to ensure it returns False, |
| #86 | # regardless of what module-level constants were set at import time. |
| #87 | # extract_facts() now calls local_llm.llm_available() through the live module |
| #88 | # reference (so monkeypatch on local_llm reaches it). Patch there. |
| #89 | with patch("mnemosyne.core.local_llm.llm_available", return_value=False): |
| #90 | facts = extract_facts_safe("I love coffee and this is long enough for extraction") |
| #91 | assert facts == [] |
| #92 | |
| #93 | print("PASS: test_extract_facts_safe_no_llm") |
| #94 | |
| #95 | |
| #96 | def test_extract_facts_safe_exception_handling(): |
| #97 | """Test that extract_facts_safe never raises.""" |
| #98 | from unittest.mock import patch |
| #99 | |
| #100 | # Should not raise even with garbage input |
| #101 | facts = extract_facts_safe(None) |
| #102 | assert facts == [] |
| #103 | facts = extract_facts_safe("") |
| #104 | assert facts == [] |
| #105 | |
| #106 | # "x" is valid text but too short for meaningful extraction. |
| #107 | # Patch llm_available to ensure no LLM call is attempted. |
| #108 | # extract_facts() now calls local_llm.llm_available() through the live module |
| #109 | # reference (so monkeypatch on local_llm reaches it). Patch there. |
| #110 | with patch("mnemosyne.core.local_llm.llm_available", return_value=False): |
| #111 | facts = extract_facts_safe("x") |
| #112 | assert facts == [] |
| #113 | |
| #114 | print("PASS: test_extract_facts_safe_exception_handling") |
| #115 | |
| #116 | |
| #117 | def test_triplestore_add_facts(): |
| #118 | """Test TripleStore.add_facts() batch storage.""" |
| #119 | with tempfile.TemporaryDirectory() as tmpdir: |
| #120 | db_path = Path(tmpdir) / "test.db" |
| #121 | init_triples(db_path) |
| #122 | |
| #123 | triples = TripleStore(db_path=db_path) |
| #124 | count = triples.add_facts( |
| #125 | "mem_123", |
| #126 | ["The user loves coffee", "The user hates mornings", "x"], # "x" too short |
| #127 | source="test", |
| #128 | confidence=0.7 |
| #129 | ) |
| #130 | |
| #131 | assert count == 2 # "x" filtered out |
| #132 | |
| #133 | # Verify stored |
| #134 | all_facts = triples.query_by_predicate("fact") |
| #135 | assert len(all_facts) == 2 |
| #136 | assert all(f["subject"] == "mem_123" for f in all_facts) |
| #137 | assert all(f["predicate"] == "fact" for f in all_facts) |
| #138 | assert all(f["confidence"] == 0.7 for f in all_facts) |
| #139 | |
| #140 | print("PASS: test_triplestore_add_facts") |
| #141 | |
| #142 | |
| #143 | def test_triplestore_add_facts_empty(): |
| #144 | """Test TripleStore.add_facts() with empty list.""" |
| #145 | with tempfile.TemporaryDirectory() as tmpdir: |
| #146 | db_path = Path(tmpdir) / "test.db" |
| #147 | init_triples(db_path) |
| #148 | |
| #149 | triples = TripleStore(db_path=db_path) |
| #150 | count = triples.add_facts("mem_456", [], source="test") |
| #151 | assert count == 0 |
| #152 | |
| #153 | print("PASS: test_triplestore_add_facts_empty") |
| #154 | |
| #155 | |
| #156 | def test_extraction_prompt_configurable(): |
| #157 | """Test that EXTRACTION_PROMPT env var overrides default.""" |
| #158 | old_prompt = os.environ.get("MNEMOSYNE_EXTRACTION_PROMPT", "") |
| #159 | |
| #160 | try: |
| #161 | custom = "CUSTOM PROMPT: {text}" |
| #162 | os.environ["MNEMOSYNE_EXTRACTION_PROMPT"] = custom |
| #163 | |
| #164 | # Re-import to pick up new env var |
| #165 | # (In real usage, you'd restart; here we test the constant directly) |
| #166 | from mnemosyne.core.extraction import EXTRACTION_PROMPT as ep |
| #167 | # Note: module-level constants are set at import time, so this tests |
| #168 | # that the code structure supports it. The actual override requires |
| #169 | # re-import or setting before import. |
| #170 | |
| #171 | # Instead, verify the function uses the constant |
| #172 | prompt = _build_extraction_prompt("test") |
| #173 | assert "test" in prompt |
| #174 | print("PASS: test_extraction_prompt_configurable") |
| #175 | finally: |
| #176 | if old_prompt: |
| #177 | os.environ["MNEMOSYNE_EXTRACTION_PROMPT"] = old_prompt |
| #178 | else: |
| #179 | os.environ.pop("MNEMOSYNE_EXTRACTION_PROMPT", None) |
| #180 | |
| #181 | |
| #182 | def run_all_tests(): |
| #183 | """Run all unit tests.""" |
| #184 | print("=" * 60) |
| #185 | print("Phase 2: Structured Fact Extraction — Unit Tests") |
| #186 | print("=" * 60) |
| #187 | |
| #188 | tests = [ |
| #189 | test_build_extraction_prompt, |
| #190 | test_parse_facts_basic, |
| #191 | test_parse_facts_with_numbering, |
| #192 | test_parse_facts_no_facts, |
| #193 | test_parse_facts_empty, |
| #194 | test_extract_facts_safe_no_llm, |
| #195 | test_extract_facts_safe_exception_handling, |
| #196 | test_triplestore_add_facts, |
| #197 | test_triplestore_add_facts_empty, |
| #198 | test_extraction_prompt_configurable, |
| #199 | ] |
| #200 | |
| #201 | passed = 0 |
| #202 | failed = 0 |
| #203 | |
| #204 | for test in tests: |
| #205 | try: |
| #206 | test() |
| #207 | passed += 1 |
| #208 | except Exception as e: |
| #209 | failed += 1 |
| #210 | print(f"FAIL: {test.__name__}: {e}") |
| #211 | |
| #212 | print("=" * 60) |
| #213 | print(f"Results: {passed} passed, {failed} failed, {len(tests)} total") |
| #214 | print("=" * 60) |
| #215 | |
| #216 | return failed == 0 |
| #217 | |
| #218 | |
| #219 | if __name__ == "__main__": |
| #220 | success = run_all_tests() |
| #221 | sys.exit(0 if success else 1) |
| #222 | |
| #223 | |
| #224 | # --------------------------------------------------------------------------- |
| #225 | # Host LLM backend integration (decisions A1, A3, C2) |
| #226 | # --------------------------------------------------------------------------- |
| #227 | |
| #228 | from unittest.mock import patch # noqa: E402 |
| #229 | |
| #230 | from mnemosyne.core import extraction as _extraction_mod, local_llm # noqa: E402 |
| #231 | from mnemosyne.core.llm_backends import ( # noqa: E402 |
| #232 | CallableLLMBackend, |
| #233 | set_host_llm_backend, |
| #234 | ) |
| #235 | |
| #236 | |
| #237 | def _enable_host(monkeypatch): |
| #238 | monkeypatch.setattr(local_llm, "LLM_ENABLED", True) |
| #239 | monkeypatch.setattr(local_llm, "HOST_LLM_ENABLED", True) |
| #240 | monkeypatch.setattr(local_llm, "HOST_LLM_PROVIDER", None) |
| #241 | monkeypatch.setattr(local_llm, "HOST_LLM_MODEL", None) |
| #242 | |
| #243 | |
| #244 | def test_host_extract_facts_uses_temperature_zero(monkeypatch): |
| #245 | """C2 contract: extract_facts forces temperature=0.0 for determinism.""" |
| #246 | _enable_host(monkeypatch) |
| #247 | monkeypatch.setattr(local_llm, "LLM_BASE_URL", "http://remote/v1") |
| #248 | monkeypatch.setattr(local_llm, "LLM_MAX_TOKENS", 128) |
| #249 | |
| #250 | captured = [] |
| #251 | |
| #252 | def fake(prompt, *, max_tokens, temperature, timeout, provider=None, model=None): |
| #253 | captured.append({"temperature": temperature, "max_tokens": max_tokens}) |
| #254 | return "Alex uses Neovim.\nAlex dislikes VSCode." |
| #255 | |
| #256 | set_host_llm_backend(CallableLLMBackend("test", fake)) |
| #257 | with patch.object(local_llm, "_call_remote_llm") as mock_remote: |
| #258 | facts = extract_facts("Alex said they prefer Neovim and dislike VSCode.") |
| #259 | mock_remote.assert_not_called() |
| #260 | |
| #261 | assert any("Neovim" in f for f in facts) |
| #262 | assert captured |
| #263 | assert captured[0]["temperature"] == 0.0 |
| #264 | assert captured[0]["max_tokens"] == 128 |
| #265 | |
| #266 | |
| #267 | def test_host_extract_facts_skips_remote_on_host_miss(monkeypatch): |
| #268 | """A3 contract: host enabled, host returns None → fall to local, NOT remote.""" |
| #269 | _enable_host(monkeypatch) |
| #270 | monkeypatch.setattr(local_llm, "LLM_BASE_URL", "http://remote/v1") |
| #271 | set_host_llm_backend(CallableLLMBackend("test", lambda *a, **k: None)) |
| #272 | |
| #273 | fake_local = lambda prompt, max_new_tokens, stop: "Local fact one.\nLocal fact two." # noqa: E731 |
| #274 | |
| #275 | with patch.object(local_llm, "_call_remote_llm", return_value="Remote facts.") as mock_remote, \ |
| #276 | patch.object(local_llm, "_load_llm", return_value=fake_local) as mock_load: |
| #277 | facts = extract_facts("some content with facts to extract") |
| #278 | mock_remote.assert_not_called() |
| #279 | mock_load.assert_called() |
| #280 | |
| #281 | assert any("Local fact" in f for f in facts) |
| #282 | |
| #283 | |
| #284 | def test_host_extract_facts_unchanged_when_HOST_LLM_ENABLED_false(monkeypatch): |
| #285 | """REGRESSION: existing behavior preserved when host is off.""" |
| #286 | monkeypatch.setattr(local_llm, "LLM_ENABLED", True) |
| #287 | monkeypatch.setattr(local_llm, "HOST_LLM_ENABLED", False) |
| #288 | monkeypatch.setattr(local_llm, "LLM_BASE_URL", "http://remote/v1") |
| #289 | set_host_llm_backend(CallableLLMBackend("test", lambda *a, **k: "Host fact.\nAnother host fact.")) |
| #290 | with patch.object(local_llm, "_call_remote_llm", return_value="Remote fact one.\nRemote fact two."): |
| #291 | facts = extract_facts("some content") |
| #292 | assert any("Remote fact" in f for f in facts) |
| #293 | assert not any("Host fact" in f for f in facts) |
| #294 | |
| #295 | |
| #296 | def test_host_extract_facts_preserves_bulleted_output(monkeypatch): |
| #297 | """REGRESSION (codex finding): host output like '- fact one' must survive |
| #298 | so _parse_facts() can strip the bullet prefix. Earlier the helper ran |
| #299 | output through _clean_output(), which deletes whole bullet lines.""" |
| #300 | _enable_host(monkeypatch) |
| #301 | monkeypatch.setattr(local_llm, "LLM_BASE_URL", "http://remote/v1") |
| #302 | |
| #303 | # Codex/GPT often returns facts as a bulleted list — exactly the shape |
| #304 | # _clean_output() would otherwise nuke at re.sub(r"^\s*[-*]\s.*\n", ""). |
| #305 | set_host_llm_backend(CallableLLMBackend( |
| #306 | "test", |
| #307 | lambda *a, **k: "- Alex uses Neovim.\n- Alex dislikes VSCode.\n- Alex uses example.com email.", |
| #308 | )) |
| #309 | facts = extract_facts("Alex said they prefer Neovim, dislike VSCode, and use example.com email.") |
| #310 | assert any("Neovim" in f for f in facts), f"bullet '-' lines were stripped: {facts}" |
| #311 | assert any("VSCode" in f for f in facts) |
| #312 | assert any("example.com" in f for f in facts) |
| #313 | # And the bullet prefix should be gone (parse_facts strips it). |
| #314 | assert not any(f.startswith("-") for f in facts) |
| #315 | |
| #316 | |
| #317 | def test_host_extract_facts_remote_path_uses_temperature_zero(monkeypatch): |
| #318 | """REGRESSION (codex finding 2): extract_facts must pass temperature=0.0 |
| #319 | even on the standalone remote path, not just the host path.""" |
| #320 | monkeypatch.setattr(local_llm, "LLM_ENABLED", True) |
| #321 | monkeypatch.setattr(local_llm, "HOST_LLM_ENABLED", False) # force remote path |
| #322 | monkeypatch.setattr(local_llm, "LLM_BASE_URL", "http://remote/v1") |
| #323 | |
| #324 | captured = {} |
| #325 | |
| #326 | def fake_remote(prompt, temperature=0.3): |
| #327 | captured["temperature"] = temperature |
| #328 | return "Some fact about something.\nAnother fact about elsewhere." |
| #329 | |
| #330 | monkeypatch.setattr(local_llm, "_call_remote_llm", fake_remote) |
| #331 | facts = extract_facts("some content with facts") |
| #332 | assert facts # parsed successfully |
| #333 | assert captured["temperature"] == 0.0, ( |
| #334 | "Remote extraction path must use temperature=0.0 for determinism" |
| #335 | ) |
| #336 | |
| #337 | |
| #338 | def test_host_extract_facts_returns_empty_when_both_host_and_local_fail(monkeypatch): |
| #339 | """Codex finding 5 graceful-degradation path: if host attempts and local |
| #340 | raises (e.g., oversized prompt), return [] cleanly so AAAK fallback runs.""" |
| #341 | _enable_host(monkeypatch) |
| #342 | monkeypatch.setattr(local_llm, "LLM_BASE_URL", "http://remote/v1") |
| #343 | set_host_llm_backend(CallableLLMBackend("test", lambda *a, **k: None)) |
| #344 | |
| #345 | def fake_local_that_blows_up(prompt, max_new_tokens, stop): |
| #346 | raise RuntimeError("simulated oversized prompt") |
| #347 | |
| #348 | with patch.object(local_llm, "_call_remote_llm") as mock_remote, \ |
| #349 | patch.object(local_llm, "_load_llm", return_value=fake_local_that_blows_up): |
| #350 | facts = extract_facts("some content") |
| #351 | mock_remote.assert_not_called() # A3 still holds |
| #352 | assert facts == [] |
| #353 | |
| #354 | |
| #355 | def test_host_extract_facts_swallows_exception_then_local(monkeypatch): |
| #356 | """Backend that raises is treated as host-attempted-with-no-output.""" |
| #357 | _enable_host(monkeypatch) |
| #358 | monkeypatch.setattr(local_llm, "LLM_BASE_URL", "http://remote/v1") |
| #359 | |
| #360 | def boom(*a, **k): |
| #361 | raise RuntimeError("hermes is angry") |
| #362 | |
| #363 | set_host_llm_backend(CallableLLMBackend("test", boom)) |
| #364 | fake_local = lambda prompt, max_new_tokens, stop: "Recovered fact one.\nRecovered fact two." # noqa: E731 |
| #365 | with patch.object(local_llm, "_call_remote_llm") as mock_remote, \ |
| #366 | patch.object(local_llm, "_load_llm", return_value=fake_local): |
| #367 | facts = extract_facts("some content") |
| #368 | mock_remote.assert_not_called() |
| #369 | assert any("Recovered fact" in f for f in facts) |
| #370 |