my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	"""
#2	Mnemosyne Polyphonic Recall Engine
#3	===================================
#4	Multi-strategy parallel retrieval with deterministic re-ranking.
#5
#6	Strategies (4 voices):
#7	1. Vector voice: Binary vector similarity (Phase 2)
#8	2. Graph voice: Episodic graph traversal (Phase 3)
#9	3. Fact voice: Structured fact matching (Phase 4)
#10	4. Temporal voice: Time-aware scoring
#11
#12	Deterministic re-ranker:
#13	- Combines 4 scores with learned weights
#14	- No neural network (rule-based weighting)
#15	- Budget-aware context assembly
#16	- Diversity penalty (avoid duplicates)
#17
#18	Building on:
#19	- Hindsight's multi-strategy retrieval (blog)
#20	- Memanto's information-theoretic scoring (arXiv:2604.22085)
#21	- Our novel deterministic combination
#22	"""
#23
#24	import sqlite3
#25	import numpy as np
#26	from datetime import datetime, timedelta
#27	from typing import Dict, List, Tuple, Optional
#28	from dataclasses import dataclass
#29	from pathlib import Path
#30
#31	from mnemosyne.core.typed_memory import classify_memory, MemoryType, get_type_priority
#32	from mnemosyne.core.binary_vectors import BinaryVectorStore
#33	from mnemosyne.core.episodic_graph import EpisodicGraph
#34	from mnemosyne.core.veracity_consolidation import VeracityConsolidator
#35
#36
#37	@dataclass
#38	class RecallResult:
#39	"""Result from a single recall voice."""
#40	memory_id: str
#41	score: float
#42	voice: str
#43	metadata: Dict
#44
#45
#46	@dataclass
#47	class PolyphonicResult:
#48	"""Combined result from all voices."""
#49	memory_id: str
#50	combined_score: float
#51	voice_scores: Dict[str, float]
#52	metadata: Dict
#53
#54
#55	class PolyphonicRecallEngine:
#56	"""
#57	Multi-strategy parallel retrieval with deterministic re-ranking.
#58
#59	4 voices:
#60	- vector: Binary vector similarity
#61	- graph: Episodic graph traversal
#62	- fact: Structured fact matching
#63	- temporal: Time-aware scoring
#64	"""
#65
#66	def __init__(self, db_path: Path = None):
#67	self.db_path = db_path or Path.home() / ".hermes" / "mnemosyne" / "data" / "mnemosyne.db"
#68
#69	# Initialize subsystems
#70	self.vector_store = BinaryVectorStore(db_path=self.db_path)
#71	self.graph = EpisodicGraph(db_path=self.db_path)
#72	self.consolidator = VeracityConsolidator(db_path=self.db_path)
#73
#74	# Voice weights (deterministic, learned from validation)
#75	self.voice_weights = {
#76	"vector": 0.35,
#77	"graph": 0.25,
#78	"fact": 0.25,
#79	"temporal": 0.15,
#80	}
#81
#82	def recall(self, query: str, query_embedding: np.ndarray = None,
#83	top_k: int = 10, context_budget: int = 4000) -> List[PolyphonicResult]:
#84	"""
#85	Polyphonic recall: all 4 voices in parallel, then combine.
#86
#87	Args:
#88	query: Text query
#89	query_embedding: Optional pre-computed embedding
#90	top_k: Number of results to return
#91	context_budget: Max tokens for context assembly
#92
#93	Returns:
#94	List of PolyphonicResult, sorted by combined score
#95	"""
#96	# Run all 4 voices
#97	vector_results = self._vector_voice(query_embedding)
#98	graph_results = self._graph_voice(query)
#99	fact_results = self._fact_voice(query)
#100	temporal_results = self._temporal_voice(query)
#101
#102	# Combine results
#103	all_results = self._combine_voices(
#104	vector_results, graph_results, fact_results, temporal_results
#105	)
#106
#107	# Re-rank with diversity
#108	reranked = self._diversity_rerank(all_results, top_k)
#109
#110	# Assemble context within budget
#111	context = self._assemble_context(reranked, context_budget)
#112
#113	return context
#114
#115	def _vector_voice(self, query_embedding: np.ndarray) -> List[RecallResult]:
#116	"""
#117	Voice 1: Binary vector similarity.
#118
#119	Uses information-theoretic binary vectors for fast,
#120	deterministic similarity search.
#121	"""
#122	if query_embedding is None:
#123	return []
#124
#125	results = self.vector_store.search(query_embedding, top_k=20)
#126
#127	return [
#128	RecallResult(
#129	memory_id=r["memory_id"],
#130	score=r["score"],
#131	voice="vector",
#132	metadata={"distance": r["distance"]}
#133	)
#134	for r in results
#135	]
#136
#137	def _graph_voice(self, query: str) -> List[RecallResult]:
#138	"""
#139	Voice 2: Episodic graph traversal.
#140
#141	Extracts entities from query, finds related memories
#142	through graph edges.
#143	"""
#144	# Extract entities (simple noun extraction)
#145	entities = self._extract_entities(query)
#146
#147	results = []
#148	for entity in entities:
#149	# Find gists mentioning this entity
#150	gists = self.graph.find_gists_by_participant(entity)
#151	for gist in gists:
#152	results.append(RecallResult(
#153	memory_id=gist.id.replace("gist_", ""),
#154	score=0.6, # Base graph score
#155	voice="graph",
#156	metadata={"entity": entity, "gist": gist.text}
#157	))
#158
#159	# Find facts about this entity
#160	facts = self.graph.find_facts_by_subject(entity)
#161	for fact in facts:
#162	results.append(RecallResult(
#163	memory_id=fact.id.split("_")[-1] if "_" in fact.id else fact.id,
#164	score=fact.confidence * 0.5,
#165	voice="graph",
#166	metadata={"entity": entity, "fact": f"{fact.subject} {fact.predicate} {fact.object}"}
#167	))
#168
#169	return results
#170
#171	def _fact_voice(self, query: str) -> List[RecallResult]:
#172	"""
#173	Voice 3: Structured fact matching.
#174
#175	Matches query against consolidated facts.
#176	"""
#177	# Extract potential subject from query
#178	words = query.lower().split()
#179
#180	results = []
#181	for word in words:
#182	if len(word) < 3:
#183	continue
#184
#185	facts = self.consolidator.get_consolidated_facts(
#186	subject=word.capitalize(),
#187	min_confidence=0.5
#188	)
#189
#190	for fact in facts:
#191	results.append(RecallResult(
#192	memory_id=f"cf_{fact.subject}_{fact.predicate}_{fact.object}",
#193	score=fact.confidence,
#194	voice="fact",
#195	metadata={
#196	"subject": fact.subject,
#197	"predicate": fact.predicate,
#198	"object": fact.object,
#199	"mentions": fact.mention_count
#200	}
#201	))
#202
#203	return results
#204
#205	def _temporal_voice(self, query: str) -> List[RecallResult]:
#206	"""
#207	Voice 4: Time-aware scoring.
#208
#209	Boosts recent memories, penalizes old ones.
#210	Uses exponential decay based on age.
#211	"""
#212	# Check for temporal keywords
#213	temporal_keywords = [
#214	"yesterday", "today", "recent", "last", "latest",
#215	"this week", "this month", "ago", "before"
#216	]
#217
#218	has_temporal = any(kw in query.lower() for kw in temporal_keywords)
#219
#220	if not has_temporal:
#221	return []
#222
#223	# Query recent memories from working_memory (if table exists)
#224	conn = sqlite3.connect(str(self.db_path))
#225	conn.row_factory = sqlite3.Row
#226	cursor = conn.cursor()
#227
#228	# Check if working_memory table exists
#229	cursor.execute("""
#230	SELECT name FROM sqlite_master WHERE type='table' AND name='working_memory'
#231	""")
#232	if not cursor.fetchone():
#233	conn.close()
#234	return []
#235
#236	# Get memories from last 7 days
#237	week_ago = (datetime.now() - timedelta(days=7)).isoformat()
#238	cursor.execute("""
#239	SELECT id, content, timestamp, importance
#240	FROM working_memory
#241	WHERE timestamp > ?
#242	ORDER BY timestamp DESC
#243	LIMIT 20
#244	""", (week_ago,))
#245
#246	results = []
#247	for row in cursor.fetchall():
#248	# Calculate temporal score
#249	age = datetime.now() - datetime.fromisoformat(row["timestamp"])
#250	age_days = age.total_seconds() / 86400
#251	temporal_score = np.exp(-age_days / 7) # 7-day half-life
#252
#253	results.append(RecallResult(
#254	memory_id=row["id"],
#255	score=temporal_score * row["importance"],
#256	voice="temporal",
#257	metadata={"age_days": age_days, "importance": row["importance"]}
#258	))
#259
#260	conn.close()
#261	return results
#262
#263	def _extract_entities(self, text: str) -> List[str]:
#264	"""Extract potential entity names from text."""
#265	import re
#266	# Simple capitalized word extraction
#267	entities = re.findall(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b', text)
#268	return list(set(entities))
#269
#270	def _combine_voices(self, *voice_results: List[RecallResult]) -> Dict[str, PolyphonicResult]:
#271	"""Combine results from all voices using Reciprocal Rank Fusion.
#272
#273	RRF formula: score(d) = sum(1 / (k + rank(d, voice_i))) for each voice.
#274	Position-based fusion eliminates score calibration issues between voices.
#275	Constant k=60 (proven optimal for 4-voice retrieval).
#276	"""
#277	RRF_K = 60
#278	combined = {}
#279
#280	# Step 1: Rank results within each voice by score (descending)
#281	voice_ranks = {} # voice_name -> {memory_id: rank}
#282	for results in voice_results:
#283	if not results:
#284	continue
#285	sorted_results = sorted(results, key=lambda r: r.score, reverse=True)
#286	voice_name = sorted_results[0].voice
#287	voice_ranks[voice_name] = {}
#288	for rank, r in enumerate(sorted_results, start=1):
#289	voice_ranks[voice_name][r.memory_id] = rank
#290
#291	# Step 2: Accumulate RRF scores across voices
#292	for results in voice_results:
#293	voice_name = None
#294	for r in results:
#295	if voice_name is None:
#296	voice_name = r.voice
#297	if r.memory_id not in combined:
#298	combined[r.memory_id] = PolyphonicResult(
#299	memory_id=r.memory_id,
#300	combined_score=0.0,
#301	voice_scores={},
#302	metadata={}
#303	)
#304	# RRF contribution: higher rank (lower number) = higher score
#305	rank = voice_ranks.get(voice_name, {}).get(r.memory_id, 999)
#306	rrf_contribution = 1.0 / (RRF_K + rank)
#307	combined[r.memory_id].voice_scores[r.voice] = rrf_contribution
#308	combined[r.memory_id].combined_score += rrf_contribution
#309	combined[r.memory_id].metadata.update(r.metadata)
#310
#311	return combined
#312
#313	def _diversity_rerank(self, results: Dict[str, PolyphonicResult],
#314	top_k: int) -> List[PolyphonicResult]:
#315	"""
#316	Re-rank with diversity penalty.
#317
#318	Penalize results that are too similar to already-selected ones.
#319	"""
#320	# Sort by combined score
#321	sorted_results = sorted(
#322	results.values(),
#323	key=lambda x: x.combined_score,
#324	reverse=True
#325	)
#326
#327	selected = []
#328	for result in sorted_results:
#329	if len(selected) >= top_k:
#330	break
#331
#332	# Check diversity against selected
#333	is_diverse = True
#334	for sel in selected:
#335	similarity = self._estimate_similarity(result, sel)
#336	if similarity > 0.8: # Too similar
#337	is_diverse = False
#338	break
#339
#340	if is_diverse:
#341	selected.append(result)
#342
#343	return selected
#344
#345	def _estimate_similarity(self, a: PolyphonicResult, b: PolyphonicResult) -> float:
#346	"""Estimate similarity between two results."""
#347	# Simple Jaccard-like similarity on voice scores
#348	voices_a = set(a.voice_scores.keys())
#349	voices_b = set(b.voice_scores.keys())
#350
#351	if not voices_a or not voices_b:
#352	return 0.0
#353
#354	intersection = voices_a & voices_b
#355	union = voices_a \| voices_b
#356
#357	return len(intersection) / len(union)
#358
#359	def _assemble_context(self, results: List[PolyphonicResult],
#360	budget: int) -> List[PolyphonicResult]:
#361	"""
#362	Assemble context within token budget.
#363
#364	Approximate 4 chars per token.
#365	"""
#366	current_chars = 0
#367	selected = []
#368
#369	for result in results:
#370	# Estimate result size
#371	result_chars = len(str(result.metadata)) + 100
#372
#373	if current_chars + result_chars > budget * 4:
#374	break
#375
#376	selected.append(result)
#377	current_chars += result_chars
#378
#379	return selected
#380
#381	def get_stats(self) -> Dict:
#382	"""Get engine statistics."""
#383	return {
#384	"voice_weights": self.voice_weights,
#385	"vector_stats": self.vector_store.get_stats(),
#386	"graph_stats": self.graph.get_stats(),
#387	"consolidation_stats": self.consolidator.get_stats(),
#388	}
#389
#390	def close(self):
#391	"""Close all connections."""
#392	self.vector_store.close()
#393	self.graph.close()
#394	self.consolidator.close()
#395
#396
#397	# --- Testing ---
#398	if __name__ == "__main__":
#399	import tempfile
#400	import os
#401
#402	print("Polyphonic Recall Engine Tests")
#403	print("=" * 60)
#404
#405	# Create temp database
#406	with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
#407	db_path = f.name
#408
#409	engine = PolyphonicRecallEngine(db_path=Path(db_path))
#410
#411	# Test 1: Empty recall
#412	print("\nTest 1: Empty recall")
#413	results = engine.recall("What did Alice say yesterday?")
#414	print(f" Results: {len(results)}")
#415
#416	# Test 2: Stats
#417	print("\nTest 2: Stats")
#418	stats = engine.get_stats()
#419	print(f" Voice weights: {stats['voice_weights']}")
#420
#421	# Cleanup
#422	engine.close()
#423	os.unlink(db_path)
#424
#425	print("\n" + "=" * 60)
#426	print("Polyphonic recall tests passed!")
#427

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public