my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	"""
#2	Mnemosyne Veracity-Weighted Consolidation
#3	=========================================
#4	Our novel contribution: Bayesian confidence scoring + conflict resolution.
#5
#6	Veracity tiers:
#7	- stated: 1.0 (user explicitly stated)
#8	- inferred: 0.7 (inferred from context)
#9	- tool: 0.5 (tool output, may be stale)
#10	- imported: 0.6 (imported from external source)
#11	- unknown: 0.8 (default, unverified)
#12
#13	Bayesian updating:
#14	- confidence = 1 - (0.7^n) where n = mention count
#15	- More mentions = higher confidence
#16	- Contradictions detected and flagged
#17
#18	Conflict resolution:
#19	- Same subject + predicate = potential conflict
#20	- Higher confidence wins
#21	- Lower confidence flagged for review
#22	- Consolidation: periodic synthesis of high-confidence facts
#23	"""
#24
#25	import sqlite3
#26	import json
#27	from datetime import datetime, timedelta
#28	from typing import Dict, List, Tuple, Optional
#29	from dataclasses import dataclass
#30	from pathlib import Path
#31
#32
#33	# Veracity weights
#34	VERACITY_WEIGHTS = {
#35	"stated": 1.0,
#36	"inferred": 0.7,
#37	"tool": 0.5,
#38	"imported": 0.6,
#39	"unknown": 0.8,
#40	}
#41
#42
#43	@dataclass
#44	class ConsolidatedFact:
#45	"""A fact that has been through consolidation."""
#46	subject: str
#47	predicate: str
#48	object: str
#49	confidence: float
#50	mention_count: int
#51	first_seen: str
#52	last_seen: str
#53	sources: List[str]
#54	veracity: str
#55	superseded: bool = False
#56
#57
#58	class VeracityConsolidator:
#59	"""
#60	Bayesian confidence consolidation with conflict detection.
#61
#62	Builds on:
#63	- Memanto's conflict resolution (arXiv:2604.22085)
#64	- REMem's fact preservation (arXiv:2602.13530)
#65	- Our novel veracity-weighted Bayesian updating
#66	"""
#67
#68	def __init__(self, db_path: Path = None, conn=None):
#69	if conn is not None:
#70	self.conn = conn
#71	self.db_path = db_path or Path(":memory:")
#72	else:
#73	self.db_path = db_path or Path.home() / ".hermes" / "mnemosyne" / "data" / "mnemosyne.db"
#74	self.conn = sqlite3.connect(str(self.db_path), check_same_thread=False)
#75	self.conn.row_factory = sqlite3.Row
#76	self._owns_connection = conn is None
#77	self._init_tables()
#78
#79	def _init_tables(self):
#80	"""Initialize consolidation schema."""
#81	cursor = self.conn.cursor()
#82
#83	# Consolidated facts table
#84	cursor.execute("""
#85	CREATE TABLE IF NOT EXISTS consolidated_facts (
#86	id TEXT PRIMARY KEY,
#87	subject TEXT NOT NULL,
#88	predicate TEXT NOT NULL,
#89	object TEXT NOT NULL,
#90	confidence REAL DEFAULT 0.5,
#91	mention_count INTEGER DEFAULT 1,
#92	first_seen TEXT,
#93	last_seen TEXT,
#94	sources_json TEXT,
#95	veracity TEXT DEFAULT 'unknown',
#96	superseded_by TEXT,
#97	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
#98	updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
#99	)
#100	""")
#101	cursor.execute("CREATE INDEX IF NOT EXISTS idx_cf_subject ON consolidated_facts(subject)")
#102	cursor.execute("CREATE INDEX IF NOT EXISTS idx_cf_predicate ON consolidated_facts(predicate)")
#103	cursor.execute("CREATE INDEX IF NOT EXISTS idx_cf_object ON consolidated_facts(object)")
#104
#105	# Conflicts table
#106	cursor.execute("""
#107	CREATE TABLE IF NOT EXISTS conflicts (
#108	id INTEGER PRIMARY KEY AUTOINCREMENT,
#109	fact_a_id TEXT NOT NULL,
#110	fact_b_id TEXT NOT NULL,
#111	conflict_type TEXT,
#112	resolution TEXT,
#113	resolved_at TEXT,
#114	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
#115	)
#116	""")
#117
#118	self.conn.commit()
#119
#120	def bayesian_update(self, current_confidence: float, veracity: str) -> float:
#121	"""
#122	Update confidence using Bayesian formula.
#123
#124	Formula: new_confidence = 1 - (0.7^n) where n = mention count
#125	But we approximate with: new = old + (1 - old) * veracity_weight * 0.3
#126
#127	Args:
#128	current_confidence: Current confidence level
#129	veracity: Veracity tier
#130
#131	Returns:
#132	float: Updated confidence
#133	"""
#134	weight = VERACITY_WEIGHTS.get(veracity, 0.8)
#135	increment = (1.0 - current_confidence) * weight * 0.3
#136	return min(current_confidence + increment, 1.0)
#137
#138	def consolidate_fact(self, subject: str, predicate: str, object: str,
#139	veracity: str = "unknown", source: str = None) -> ConsolidatedFact:
#140	"""
#141	Add or update a fact in consolidation.
#142
#143	Args:
#144	subject: Fact subject
#145	predicate: Fact predicate
#146	object: Fact object
#147	veracity: Veracity tier
#148	source: Source memory ID
#149
#150	Returns:
#151	ConsolidatedFact: The consolidated result
#152	"""
#153	cursor = self.conn.cursor()
#154
#155	# Check if fact already exists
#156	cursor.execute("""
#157	SELECT * FROM consolidated_facts
#158	WHERE subject = ? AND predicate = ? AND object = ?
#159	""", (subject, predicate, object))
#160
#161	row = cursor.fetchone()
#162	now = datetime.now().isoformat()
#163
#164	if row:
#165	# Update existing fact
#166	new_confidence = self.bayesian_update(row["confidence"], veracity)
#167	new_count = row["mention_count"] + 1
#168
#169	sources = json.loads(row["sources_json"] or "[]")
#170	if source and source not in sources:
#171	sources.append(source)
#172
#173	cursor.execute("""
#174	UPDATE consolidated_facts
#175	SET confidence = ?, mention_count = ?, last_seen = ?,
#176	sources_json = ?, veracity = ?, updated_at = ?
#177	WHERE id = ?
#178	""", (new_confidence, new_count, now, json.dumps(sources),
#179	veracity, now, row["id"]))
#180
#181	self.conn.commit()
#182
#183	return ConsolidatedFact(
#184	subject=subject,
#185	predicate=predicate,
#186	object=object,
#187	confidence=new_confidence,
#188	mention_count=new_count,
#189	first_seen=row["first_seen"],
#190	last_seen=now,
#191	sources=sources,
#192	veracity=veracity
#193	)
#194
#195	else:
#196	# Check for conflicts (same subject+predicate, different object)
#197	cursor.execute("""
#198	SELECT * FROM consolidated_facts
#199	WHERE subject = ? AND predicate = ? AND object != ?
#200	""", (subject, predicate, object))
#201
#202	conflicts = cursor.fetchall()
#203
#204	# Insert new fact
#205	fact_id = f"cf_{subject}_{predicate}_{object}".replace(" ", "_")[:100]
#206	base_confidence = VERACITY_WEIGHTS.get(veracity, 0.8) * 0.5
#207
#208	sources = [source] if source else []
#209
#210	cursor.execute("""
#211	INSERT INTO consolidated_facts
#212	(id, subject, predicate, object, confidence, mention_count,
#213	first_seen, last_seen, sources_json, veracity)
#214	VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
#215	""", (fact_id, subject, predicate, object, base_confidence, 1,
#216	now, now, json.dumps(sources), veracity))
#217
#218	self.conn.commit()
#219
#220	# Record conflicts
#221	for conflict in conflicts:
#222	self._record_conflict(fact_id, conflict["id"], "contradiction")
#223
#224	return ConsolidatedFact(
#225	subject=subject,
#226	predicate=predicate,
#227	object=object,
#228	confidence=base_confidence,
#229	mention_count=1,
#230	first_seen=now,
#231	last_seen=now,
#232	sources=sources,
#233	veracity=veracity
#234	)
#235
#236	def _record_conflict(self, fact_a_id: str, fact_b_id: str, conflict_type: str):
#237	"""Record a conflict between two facts."""
#238	cursor = self.conn.cursor()
#239	cursor.execute("""
#240	INSERT INTO conflicts (fact_a_id, fact_b_id, conflict_type)
#241	VALUES (?, ?, ?)
#242	""", (fact_a_id, fact_b_id, conflict_type))
#243	self.conn.commit()
#244
#245	def resolve_conflict(self, conflict_id: int, winning_fact_id: str):
#246	"""
#247	Resolve a conflict by marking the losing fact as superseded.
#248
#249	Args:
#250	conflict_id: Conflict to resolve
#251	winning_fact_id: The fact that wins
#252	"""
#253	cursor = self.conn.cursor()
#254
#255	# Get conflict details
#256	cursor.execute("SELECT * FROM conflicts WHERE id = ?", (conflict_id,))
#257	conflict = cursor.fetchone()
#258
#259	if not conflict:
#260	return
#261
#262	# Determine losing fact
#263	losing_id = conflict["fact_b_id"] if winning_fact_id == conflict["fact_a_id"] else conflict["fact_a_id"]
#264
#265	# Mark as superseded
#266	now = datetime.now().isoformat()
#267	cursor.execute("""
#268	UPDATE consolidated_facts
#269	SET superseded_by = ?, updated_at = ?
#270	WHERE id = ?
#271	""", (winning_fact_id, now, losing_id))
#272
#273	# Mark conflict as resolved
#274	cursor.execute("""
#275	UPDATE conflicts
#276	SET resolution = ?, resolved_at = ?
#277	WHERE id = ?
#278	""", (f"superseded_by_{winning_fact_id}", now, conflict_id))
#279
#280	self.conn.commit()
#281
#282	def get_conflicts(self) -> List[Dict]:
#283	"""Get all unresolved conflicts."""
#284	cursor = self.conn.cursor()
#285	cursor.execute("""
#286	SELECT * FROM conflicts WHERE resolution IS NULL
#287	ORDER BY created_at DESC
#288	""")
#289
#290	conflicts = []
#291	for row in cursor.fetchall():
#292	conflicts.append({
#293	"id": row["id"],
#294	"fact_a_id": row["fact_a_id"],
#295	"fact_b_id": row["fact_b_id"],
#296	"type": row["conflict_type"],
#297	"created_at": row["created_at"]
#298	})
#299
#300	return conflicts
#301
#302	def get_consolidated_facts(self, subject: str = None, min_confidence: float = 0.5) -> List[ConsolidatedFact]:
#303	"""
#304	Get consolidated facts, optionally filtered by subject and confidence.
#305
#306	Args:
#307	subject: Filter by subject
#308	min_confidence: Minimum confidence threshold
#309
#310	Returns:
#311	List of ConsolidatedFact
#312	"""
#313	cursor = self.conn.cursor()
#314
#315	if subject:
#316	cursor.execute("""
#317	SELECT * FROM consolidated_facts
#318	WHERE subject = ? AND confidence >= ? AND superseded_by IS NULL
#319	ORDER BY confidence DESC, mention_count DESC
#320	""", (subject, min_confidence))
#321	else:
#322	cursor.execute("""
#323	SELECT * FROM consolidated_facts
#324	WHERE confidence >= ? AND superseded_by IS NULL
#325	ORDER BY confidence DESC, mention_count DESC
#326	""", (min_confidence,))
#327
#328	facts = []
#329	for row in cursor.fetchall():
#330	facts.append(ConsolidatedFact(
#331	subject=row["subject"],
#332	predicate=row["predicate"],
#333	object=row["object"],
#334	confidence=row["confidence"],
#335	mention_count=row["mention_count"],
#336	first_seen=row["first_seen"],
#337	last_seen=row["last_seen"],
#338	sources=json.loads(row["sources_json"] or "[]"),
#339	veracity=row["veracity"],
#340	superseded=row["superseded_by"] is not None
#341	))
#342
#343	return facts
#344
#345	def get_high_confidence_summary(self, subject: str, threshold: float = 0.8) -> str:
#346	"""
#347	Generate a summary of high-confidence facts about a subject.
#348
#349	Args:
#350	subject: Subject to summarize
#351	threshold: Confidence threshold
#352
#353	Returns:
#354	str: Human-readable summary
#355	"""
#356	facts = self.get_consolidated_facts(subject, min_confidence=threshold)
#357
#358	if not facts:
#359	return f"No high-confidence facts about {subject}."
#360
#361	lines = [f"High-confidence facts about {subject}:"]
#362	for fact in facts:
#363	lines.append(f" - {fact.subject} {fact.predicate} {fact.object} "
#364	f"(conf: {fact.confidence:.2f}, mentions: {fact.mention_count})")
#365
#366	return "\n".join(lines)
#367
#368	def run_consolidation_pass(self):
#369	"""
#370	Background consolidation pass.
#371
#372	1. Find facts with multiple mentions
#373	2. Boost confidence
#374	3. Detect conflicts
#375	4. Auto-resolve obvious conflicts (higher confidence wins)
#376	"""
#377	cursor = self.conn.cursor()
#378
#379	# Find facts ready for consolidation (mention_count > 2)
#380	cursor.execute("""
#381	SELECT * FROM consolidated_facts
#382	WHERE mention_count > 2 AND superseded_by IS NULL
#383	ORDER BY mention_count DESC
#384	""")
#385
#386	for row in cursor.fetchall():
#387	subject = row["subject"]
#388	predicate = row["predicate"]
#389
#390	# Find conflicts
#391	cursor.execute("""
#392	SELECT * FROM consolidated_facts
#393	WHERE subject = ? AND predicate = ? AND object != ?
#394	AND superseded_by IS NULL
#395	""", (subject, predicate, row["object"]))
#396
#397	conflicts = cursor.fetchall()
#398	for conflict in conflicts:
#399	# Auto-resolve: higher confidence wins
#400	if row["confidence"] > conflict["confidence"]:
#401	self.resolve_conflict_by_facts(row["id"], conflict["id"])
#402
#403	def resolve_conflict_by_facts(self, winning_id: str, losing_id: str):
#404	"""Resolve conflict by marking losing fact as superseded."""
#405	now = datetime.now().isoformat()
#406	cursor = self.conn.cursor()
#407
#408	cursor.execute("""
#409	UPDATE consolidated_facts
#410	SET superseded_by = ?, updated_at = ?
#411	WHERE id = ?
#412	""", (winning_id, now, losing_id))
#413
#414	self.conn.commit()
#415
#416	def get_stats(self) -> Dict:
#417	"""Get consolidation statistics."""
#418	cursor = self.conn.cursor()
#419
#420	cursor.execute("SELECT COUNT(*) FROM consolidated_facts WHERE superseded_by IS NULL")
#421	active_facts = cursor.fetchone()[0]
#422
#423	cursor.execute("SELECT COUNT(*) FROM consolidated_facts WHERE superseded_by IS NOT NULL")
#424	superseded_facts = cursor.fetchone()[0]
#425
#426	cursor.execute("SELECT COUNT(*) FROM conflicts WHERE resolution IS NULL")
#427	unresolved_conflicts = cursor.fetchone()[0]
#428
#429	cursor.execute("SELECT AVG(confidence) FROM consolidated_facts WHERE superseded_by IS NULL")
#430	avg_confidence = cursor.fetchone()[0] or 0.0
#431
#432	cursor.execute("SELECT AVG(mention_count) FROM consolidated_facts WHERE superseded_by IS NULL")
#433	avg_mentions = cursor.fetchone()[0] or 0.0
#434
#435	return {
#436	"active_facts": active_facts,
#437	"superseded_facts": superseded_facts,
#438	"unresolved_conflicts": unresolved_conflicts,
#439	"avg_confidence": round(avg_confidence, 3),
#440	"avg_mentions": round(avg_mentions, 2),
#441	}
#442
#443	def close(self):
#444	"""Close database connection."""
#445	self.conn.close()
#446
#447
#448	# --- Testing ---
#449	if __name__ == "__main__":
#450	import tempfile
#451	import os
#452
#453	print("Veracity Consolidation Tests")
#454	print("=" * 60)
#455
#456	# Create temp database
#457	with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
#458	db_path = f.name
#459
#460	cons = VeracityConsolidator(db_path=Path(db_path))
#461
#462	# Test 1: Basic consolidation
#463	print("\nTest 1: Basic consolidation")
#464	fact1 = cons.consolidate_fact("Alice", "is", "developer", "stated", "mem_001")
#465	print(f" Initial: {fact1.subject} {fact1.predicate} {fact1.object} (conf: {fact1.confidence:.2f})")
#466
#467	# Test 2: Bayesian update
#468	print("\nTest 2: Bayesian update")
#469	fact2 = cons.consolidate_fact("Alice", "is", "developer", "stated", "mem_002")
#470	print(f" Updated: {fact2.subject} {fact2.predicate} {fact2.object} (conf: {fact2.confidence:.2f}, mentions: {fact2.mention_count})")
#471
#472	# Test 3: Conflict detection
#473	print("\nTest 3: Conflict detection")
#474	fact3 = cons.consolidate_fact("Alice", "is", "manager", "inferred", "mem_003")
#475	print(f" Conflict: {fact3.subject} {fact3.predicate} {fact3.object} (conf: {fact3.confidence:.2f})")
#476
#477	conflicts = cons.get_conflicts()
#478	print(f" Unresolved conflicts: {len(conflicts)}")
#479
#480	# Test 4: Conflict resolution
#481	print("\nTest 4: Conflict resolution")
#482	if conflicts:
#483	cons.resolve_conflict(conflicts[0]["id"], "cf_Alice_is_developer")
#484	print(f" Resolved conflict #{conflicts[0]['id']}")
#485
#486	# Test 5: High-confidence summary
#487	print("\nTest 5: High-confidence summary")
#488	summary = cons.get_high_confidence_summary("Alice", threshold=0.5)
#489	print(summary)
#490
#491	# Test 6: Stats
#492	print("\nTest 6: Stats")
#493	stats = cons.get_stats()
#494	print(f" {stats}")
#495
#496	# Cleanup
#497	cons.close()
#498	os.unlink(db_path)
#499
#500	print("\n" + "=" * 60)
#501	print("Veracity consolidation tests passed!")
#502

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public