repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | """ |
| #2 | Mnemosyne Polyphonic Recall Engine |
| #3 | =================================== |
| #4 | Multi-strategy parallel retrieval with deterministic re-ranking. |
| #5 | |
| #6 | Strategies (4 voices): |
| #7 | 1. Vector voice: Binary vector similarity (Phase 2) |
| #8 | 2. Graph voice: Episodic graph traversal (Phase 3) |
| #9 | 3. Fact voice: Structured fact matching (Phase 4) |
| #10 | 4. Temporal voice: Time-aware scoring |
| #11 | |
| #12 | Deterministic re-ranker: |
| #13 | - Combines 4 scores with learned weights |
| #14 | - No neural network (rule-based weighting) |
| #15 | - Budget-aware context assembly |
| #16 | - Diversity penalty (avoid duplicates) |
| #17 | |
| #18 | Building on: |
| #19 | - Hindsight's multi-strategy retrieval (blog) |
| #20 | - Memanto's information-theoretic scoring (arXiv:2604.22085) |
| #21 | - Our novel deterministic combination |
| #22 | """ |
| #23 | |
| #24 | import sqlite3 |
| #25 | import numpy as np |
| #26 | from datetime import datetime, timedelta |
| #27 | from typing import Dict, List, Tuple, Optional |
| #28 | from dataclasses import dataclass |
| #29 | from pathlib import Path |
| #30 | |
| #31 | from mnemosyne.core.typed_memory import classify_memory, MemoryType, get_type_priority |
| #32 | from mnemosyne.core.binary_vectors import BinaryVectorStore |
| #33 | from mnemosyne.core.episodic_graph import EpisodicGraph |
| #34 | from mnemosyne.core.veracity_consolidation import VeracityConsolidator |
| #35 | |
| #36 | |
| #37 | @dataclass |
| #38 | class RecallResult: |
| #39 | """Result from a single recall voice.""" |
| #40 | memory_id: str |
| #41 | score: float |
| #42 | voice: str |
| #43 | metadata: Dict |
| #44 | |
| #45 | |
| #46 | @dataclass |
| #47 | class PolyphonicResult: |
| #48 | """Combined result from all voices.""" |
| #49 | memory_id: str |
| #50 | combined_score: float |
| #51 | voice_scores: Dict[str, float] |
| #52 | metadata: Dict |
| #53 | |
| #54 | |
| #55 | class PolyphonicRecallEngine: |
| #56 | """ |
| #57 | Multi-strategy parallel retrieval with deterministic re-ranking. |
| #58 | |
| #59 | 4 voices: |
| #60 | - vector: Binary vector similarity |
| #61 | - graph: Episodic graph traversal |
| #62 | - fact: Structured fact matching |
| #63 | - temporal: Time-aware scoring |
| #64 | """ |
| #65 | |
| #66 | def __init__(self, db_path: Path = None): |
| #67 | self.db_path = db_path or Path.home() / ".hermes" / "mnemosyne" / "data" / "mnemosyne.db" |
| #68 | |
| #69 | # Initialize subsystems |
| #70 | self.vector_store = BinaryVectorStore(db_path=self.db_path) |
| #71 | self.graph = EpisodicGraph(db_path=self.db_path) |
| #72 | self.consolidator = VeracityConsolidator(db_path=self.db_path) |
| #73 | |
| #74 | # Voice weights (deterministic, learned from validation) |
| #75 | self.voice_weights = { |
| #76 | "vector": 0.35, |
| #77 | "graph": 0.25, |
| #78 | "fact": 0.25, |
| #79 | "temporal": 0.15, |
| #80 | } |
| #81 | |
| #82 | def recall(self, query: str, query_embedding: np.ndarray = None, |
| #83 | top_k: int = 10, context_budget: int = 4000) -> List[PolyphonicResult]: |
| #84 | """ |
| #85 | Polyphonic recall: all 4 voices in parallel, then combine. |
| #86 | |
| #87 | Args: |
| #88 | query: Text query |
| #89 | query_embedding: Optional pre-computed embedding |
| #90 | top_k: Number of results to return |
| #91 | context_budget: Max tokens for context assembly |
| #92 | |
| #93 | Returns: |
| #94 | List of PolyphonicResult, sorted by combined score |
| #95 | """ |
| #96 | # Run all 4 voices |
| #97 | vector_results = self._vector_voice(query_embedding) |
| #98 | graph_results = self._graph_voice(query) |
| #99 | fact_results = self._fact_voice(query) |
| #100 | temporal_results = self._temporal_voice(query) |
| #101 | |
| #102 | # Combine results |
| #103 | all_results = self._combine_voices( |
| #104 | vector_results, graph_results, fact_results, temporal_results |
| #105 | ) |
| #106 | |
| #107 | # Re-rank with diversity |
| #108 | reranked = self._diversity_rerank(all_results, top_k) |
| #109 | |
| #110 | # Assemble context within budget |
| #111 | context = self._assemble_context(reranked, context_budget) |
| #112 | |
| #113 | return context |
| #114 | |
| #115 | def _vector_voice(self, query_embedding: np.ndarray) -> List[RecallResult]: |
| #116 | """ |
| #117 | Voice 1: Binary vector similarity. |
| #118 | |
| #119 | Uses information-theoretic binary vectors for fast, |
| #120 | deterministic similarity search. |
| #121 | """ |
| #122 | if query_embedding is None: |
| #123 | return [] |
| #124 | |
| #125 | results = self.vector_store.search(query_embedding, top_k=20) |
| #126 | |
| #127 | return [ |
| #128 | RecallResult( |
| #129 | memory_id=r["memory_id"], |
| #130 | score=r["score"], |
| #131 | voice="vector", |
| #132 | metadata={"distance": r["distance"]} |
| #133 | ) |
| #134 | for r in results |
| #135 | ] |
| #136 | |
| #137 | def _graph_voice(self, query: str) -> List[RecallResult]: |
| #138 | """ |
| #139 | Voice 2: Episodic graph traversal. |
| #140 | |
| #141 | Extracts entities from query, finds related memories |
| #142 | through graph edges. |
| #143 | """ |
| #144 | # Extract entities (simple noun extraction) |
| #145 | entities = self._extract_entities(query) |
| #146 | |
| #147 | results = [] |
| #148 | for entity in entities: |
| #149 | # Find gists mentioning this entity |
| #150 | gists = self.graph.find_gists_by_participant(entity) |
| #151 | for gist in gists: |
| #152 | results.append(RecallResult( |
| #153 | memory_id=gist.id.replace("gist_", ""), |
| #154 | score=0.6, # Base graph score |
| #155 | voice="graph", |
| #156 | metadata={"entity": entity, "gist": gist.text} |
| #157 | )) |
| #158 | |
| #159 | # Find facts about this entity |
| #160 | facts = self.graph.find_facts_by_subject(entity) |
| #161 | for fact in facts: |
| #162 | results.append(RecallResult( |
| #163 | memory_id=fact.id.split("_")[-1] if "_" in fact.id else fact.id, |
| #164 | score=fact.confidence * 0.5, |
| #165 | voice="graph", |
| #166 | metadata={"entity": entity, "fact": f"{fact.subject} {fact.predicate} {fact.object}"} |
| #167 | )) |
| #168 | |
| #169 | return results |
| #170 | |
| #171 | def _fact_voice(self, query: str) -> List[RecallResult]: |
| #172 | """ |
| #173 | Voice 3: Structured fact matching. |
| #174 | |
| #175 | Matches query against consolidated facts. |
| #176 | """ |
| #177 | # Extract potential subject from query |
| #178 | words = query.lower().split() |
| #179 | |
| #180 | results = [] |
| #181 | for word in words: |
| #182 | if len(word) < 3: |
| #183 | continue |
| #184 | |
| #185 | facts = self.consolidator.get_consolidated_facts( |
| #186 | subject=word.capitalize(), |
| #187 | min_confidence=0.5 |
| #188 | ) |
| #189 | |
| #190 | for fact in facts: |
| #191 | results.append(RecallResult( |
| #192 | memory_id=f"cf_{fact.subject}_{fact.predicate}_{fact.object}", |
| #193 | score=fact.confidence, |
| #194 | voice="fact", |
| #195 | metadata={ |
| #196 | "subject": fact.subject, |
| #197 | "predicate": fact.predicate, |
| #198 | "object": fact.object, |
| #199 | "mentions": fact.mention_count |
| #200 | } |
| #201 | )) |
| #202 | |
| #203 | return results |
| #204 | |
| #205 | def _temporal_voice(self, query: str) -> List[RecallResult]: |
| #206 | """ |
| #207 | Voice 4: Time-aware scoring. |
| #208 | |
| #209 | Boosts recent memories, penalizes old ones. |
| #210 | Uses exponential decay based on age. |
| #211 | """ |
| #212 | # Check for temporal keywords |
| #213 | temporal_keywords = [ |
| #214 | "yesterday", "today", "recent", "last", "latest", |
| #215 | "this week", "this month", "ago", "before" |
| #216 | ] |
| #217 | |
| #218 | has_temporal = any(kw in query.lower() for kw in temporal_keywords) |
| #219 | |
| #220 | if not has_temporal: |
| #221 | return [] |
| #222 | |
| #223 | # Query recent memories from working_memory (if table exists) |
| #224 | conn = sqlite3.connect(str(self.db_path)) |
| #225 | conn.row_factory = sqlite3.Row |
| #226 | cursor = conn.cursor() |
| #227 | |
| #228 | # Check if working_memory table exists |
| #229 | cursor.execute(""" |
| #230 | SELECT name FROM sqlite_master WHERE type='table' AND name='working_memory' |
| #231 | """) |
| #232 | if not cursor.fetchone(): |
| #233 | conn.close() |
| #234 | return [] |
| #235 | |
| #236 | # Get memories from last 7 days |
| #237 | week_ago = (datetime.now() - timedelta(days=7)).isoformat() |
| #238 | cursor.execute(""" |
| #239 | SELECT id, content, timestamp, importance |
| #240 | FROM working_memory |
| #241 | WHERE timestamp > ? |
| #242 | ORDER BY timestamp DESC |
| #243 | LIMIT 20 |
| #244 | """, (week_ago,)) |
| #245 | |
| #246 | results = [] |
| #247 | for row in cursor.fetchall(): |
| #248 | # Calculate temporal score |
| #249 | age = datetime.now() - datetime.fromisoformat(row["timestamp"]) |
| #250 | age_days = age.total_seconds() / 86400 |
| #251 | temporal_score = np.exp(-age_days / 7) # 7-day half-life |
| #252 | |
| #253 | results.append(RecallResult( |
| #254 | memory_id=row["id"], |
| #255 | score=temporal_score * row["importance"], |
| #256 | voice="temporal", |
| #257 | metadata={"age_days": age_days, "importance": row["importance"]} |
| #258 | )) |
| #259 | |
| #260 | conn.close() |
| #261 | return results |
| #262 | |
| #263 | def _extract_entities(self, text: str) -> List[str]: |
| #264 | """Extract potential entity names from text.""" |
| #265 | import re |
| #266 | # Simple capitalized word extraction |
| #267 | entities = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', text) |
| #268 | return list(set(entities)) |
| #269 | |
| #270 | def _combine_voices(self, *voice_results: List[RecallResult]) -> Dict[str, PolyphonicResult]: |
| #271 | """Combine results from all voices using Reciprocal Rank Fusion. |
| #272 | |
| #273 | RRF formula: score(d) = sum(1 / (k + rank(d, voice_i))) for each voice. |
| #274 | Position-based fusion eliminates score calibration issues between voices. |
| #275 | Constant k=60 (proven optimal for 4-voice retrieval). |
| #276 | """ |
| #277 | RRF_K = 60 |
| #278 | combined = {} |
| #279 | |
| #280 | # Step 1: Rank results within each voice by score (descending) |
| #281 | voice_ranks = {} # voice_name -> {memory_id: rank} |
| #282 | for results in voice_results: |
| #283 | if not results: |
| #284 | continue |
| #285 | sorted_results = sorted(results, key=lambda r: r.score, reverse=True) |
| #286 | voice_name = sorted_results[0].voice |
| #287 | voice_ranks[voice_name] = {} |
| #288 | for rank, r in enumerate(sorted_results, start=1): |
| #289 | voice_ranks[voice_name][r.memory_id] = rank |
| #290 | |
| #291 | # Step 2: Accumulate RRF scores across voices |
| #292 | for results in voice_results: |
| #293 | voice_name = None |
| #294 | for r in results: |
| #295 | if voice_name is None: |
| #296 | voice_name = r.voice |
| #297 | if r.memory_id not in combined: |
| #298 | combined[r.memory_id] = PolyphonicResult( |
| #299 | memory_id=r.memory_id, |
| #300 | combined_score=0.0, |
| #301 | voice_scores={}, |
| #302 | metadata={} |
| #303 | ) |
| #304 | # RRF contribution: higher rank (lower number) = higher score |
| #305 | rank = voice_ranks.get(voice_name, {}).get(r.memory_id, 999) |
| #306 | rrf_contribution = 1.0 / (RRF_K + rank) |
| #307 | combined[r.memory_id].voice_scores[r.voice] = rrf_contribution |
| #308 | combined[r.memory_id].combined_score += rrf_contribution |
| #309 | combined[r.memory_id].metadata.update(r.metadata) |
| #310 | |
| #311 | return combined |
| #312 | |
| #313 | def _diversity_rerank(self, results: Dict[str, PolyphonicResult], |
| #314 | top_k: int) -> List[PolyphonicResult]: |
| #315 | """ |
| #316 | Re-rank with diversity penalty. |
| #317 | |
| #318 | Penalize results that are too similar to already-selected ones. |
| #319 | """ |
| #320 | # Sort by combined score |
| #321 | sorted_results = sorted( |
| #322 | results.values(), |
| #323 | key=lambda x: x.combined_score, |
| #324 | reverse=True |
| #325 | ) |
| #326 | |
| #327 | selected = [] |
| #328 | for result in sorted_results: |
| #329 | if len(selected) >= top_k: |
| #330 | break |
| #331 | |
| #332 | # Check diversity against selected |
| #333 | is_diverse = True |
| #334 | for sel in selected: |
| #335 | similarity = self._estimate_similarity(result, sel) |
| #336 | if similarity > 0.8: # Too similar |
| #337 | is_diverse = False |
| #338 | break |
| #339 | |
| #340 | if is_diverse: |
| #341 | selected.append(result) |
| #342 | |
| #343 | return selected |
| #344 | |
| #345 | def _estimate_similarity(self, a: PolyphonicResult, b: PolyphonicResult) -> float: |
| #346 | """Estimate similarity between two results.""" |
| #347 | # Simple Jaccard-like similarity on voice scores |
| #348 | voices_a = set(a.voice_scores.keys()) |
| #349 | voices_b = set(b.voice_scores.keys()) |
| #350 | |
| #351 | if not voices_a or not voices_b: |
| #352 | return 0.0 |
| #353 | |
| #354 | intersection = voices_a & voices_b |
| #355 | union = voices_a | voices_b |
| #356 | |
| #357 | return len(intersection) / len(union) |
| #358 | |
| #359 | def _assemble_context(self, results: List[PolyphonicResult], |
| #360 | budget: int) -> List[PolyphonicResult]: |
| #361 | """ |
| #362 | Assemble context within token budget. |
| #363 | |
| #364 | Approximate 4 chars per token. |
| #365 | """ |
| #366 | current_chars = 0 |
| #367 | selected = [] |
| #368 | |
| #369 | for result in results: |
| #370 | # Estimate result size |
| #371 | result_chars = len(str(result.metadata)) + 100 |
| #372 | |
| #373 | if current_chars + result_chars > budget * 4: |
| #374 | break |
| #375 | |
| #376 | selected.append(result) |
| #377 | current_chars += result_chars |
| #378 | |
| #379 | return selected |
| #380 | |
| #381 | def get_stats(self) -> Dict: |
| #382 | """Get engine statistics.""" |
| #383 | return { |
| #384 | "voice_weights": self.voice_weights, |
| #385 | "vector_stats": self.vector_store.get_stats(), |
| #386 | "graph_stats": self.graph.get_stats(), |
| #387 | "consolidation_stats": self.consolidator.get_stats(), |
| #388 | } |
| #389 | |
| #390 | def close(self): |
| #391 | """Close all connections.""" |
| #392 | self.vector_store.close() |
| #393 | self.graph.close() |
| #394 | self.consolidator.close() |
| #395 | |
| #396 | |
| #397 | # --- Testing --- |
| #398 | if __name__ == "__main__": |
| #399 | import tempfile |
| #400 | import os |
| #401 | |
| #402 | print("Polyphonic Recall Engine Tests") |
| #403 | print("=" * 60) |
| #404 | |
| #405 | # Create temp database |
| #406 | with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: |
| #407 | db_path = f.name |
| #408 | |
| #409 | engine = PolyphonicRecallEngine(db_path=Path(db_path)) |
| #410 | |
| #411 | # Test 1: Empty recall |
| #412 | print("\nTest 1: Empty recall") |
| #413 | results = engine.recall("What did Alice say yesterday?") |
| #414 | print(f" Results: {len(results)}") |
| #415 | |
| #416 | # Test 2: Stats |
| #417 | print("\nTest 2: Stats") |
| #418 | stats = engine.get_stats() |
| #419 | print(f" Voice weights: {stats['voice_weights']}") |
| #420 | |
| #421 | # Cleanup |
| #422 | engine.close() |
| #423 | os.unlink(db_path) |
| #424 | |
| #425 | print("\n" + "=" * 60) |
| #426 | print("Polyphonic recall tests passed!") |
| #427 |