my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	#!/usr/bin/env python3
#2	"""
#3	Mnemosyne v2.0 Benchmark Suite
#4	=================================
#5	Measures store latency, recall latency, DB size, and embedding overhead.
#6	Uses a temp directory for the DB (no project pollution).
#7	Reports mean ± std across 3 runs in milliseconds.
#8	"""
#9
#10	import os
#11	import sys
#12	import time
#13	import json
#14	import shutil
#15	import tempfile
#16	import statistics
#17	from pathlib import Path
#18	from datetime import datetime
#19
#20	# Ensure project root is on path
#21	PROJECT = Path(__file__).resolve().parent
#22	sys.path.insert(0, str(PROJECT))
#23
#24	from mnemosyne.core.beam import BeamMemory, init_beam
#25	from mnemosyne.core import embeddings
#26
#27	# ── Configuration ────────────────────────────────────────────────────────────
#28	N_RUNS = 3
#29	STORE_SIZES = [100, 1000, 10000]
#30	RECALL_CORPUS_SIZES = [1000, 10000]
#31	EMBED_BATCHES = [1, 10, 100]
#32	WARMUP_MEMORIES = 5
#33
#34	# Sample texts for generating varied memories
#35	SAMPLE_TEXTS = [
#36	"The user prefers dark mode in all applications and finds light mode straining.",
#37	"Important meeting scheduled for next Tuesday at 3 PM with the engineering team.",
#38	"The project deadline has been moved to December 15th due to vendor delays.",
#39	"User's preferred programming language is Python for data analysis tasks.",
#40	"The database migration completed successfully with zero downtime last weekend.",
#41	"Customer reported a critical bug in the payment processing module yesterday.",
#42	"Team standup meetings are held every Monday, Wednesday, and Friday at 9 AM.",
#43	"The new API endpoint for user authentication has been deployed to production.",
#44	"Server monitoring shows CPU usage has been consistently above 80% this week.",
#45	"The machine learning model achieved 94.7% accuracy on the test dataset.",
#46	"Documentation for the REST API needs to be updated before the next release.",
#47	"The caching layer reduced average response times from 450ms to 32ms.",
#48	"User requested a feature to export data in CSV and JSON formats.",
#49	"The Kubernetes cluster was upgraded to version 1.28 without any issues.",
#50	"Memory optimization reduced the application's RAM usage by 40%.",
#51	"The integration tests now cover 87% of the critical code paths.",
#52	"A new team member will be joining the backend team starting next month.",
#53	"The CI/CD pipeline completes in approximately 12 minutes on average.",
#54	"Security audit identified three medium-severity vulnerabilities to patch.",
#55	"The GraphQL schema was refactored to support pagination on all queries.",
#56	"Load testing showed the system can handle 10,000 concurrent connections.",
#57	"The user's timezone is UTC-5 (Eastern Time) for scheduling purposes.",
#58	"Redis cache hit rate is currently at 96.3% across all services.",
#59	"The front-end bundle size was reduced from 2.1MB to 890KB after optimization.",
#60	"Automated backups run every 6 hours and are retained for 30 days.",
#61	"The user prefers concise summaries over detailed explanations.",
#62	"Network latency between the app and database servers averages 2.3ms.",
#63	"The staging environment mirrors production with 75% of real data volumes.",
#64	"Code coverage increased from 62% to 78% after the sprint testing push.",
#65	"The webhook integration with Slack sends alerts for all critical events.",
#66	]
#67
#68	QUERIES = [
#69	"user preferences for UI settings",
#70	"meeting schedule and calendar events",
#71	"database performance issues",
#72	"API endpoint configuration",
#73	"security vulnerabilities and patches",
#74	"machine learning model accuracy",
#75	"deployment and release timeline",
#76	"team member updates",
#77	"caching strategy optimization",
#78	"testing and code coverage",
#79	]
#80
#81
#82	def generate_text(idx: int) -> str:
#83	"""Generate unique memory text from index."""
#84	base = SAMPLE_TEXTS[idx % len(SAMPLE_TEXTS)]
#85	return f"{base} [ref-{idx:06d}]"
#86
#87
#88	def fresh_db() -> tuple:
#89	"""Create a fresh BeamMemory with a temp DB path. Returns (beam, db_dir)."""
#90	db_dir = tempfile.mkdtemp(prefix="mnemosyne_bench_")
#91	db_path = Path(db_dir) / "bench.db"
#92	init_beam(db_path)
#93	beam = BeamMemory(db_path=str(db_path))
#94	return beam, db_dir
#95
#96
#97	def cleanup(beam, db_dir):
#98	"""Close connection and remove temp dir."""
#99	try:
#100	beam.conn.close()
#101	except Exception:
#102	pass
#103	shutil.rmtree(db_dir, ignore_errors=True)
#104
#105
#106	def fmt_ms(mean_val, std_val, unit="ms"):
#107	"""Format mean ± std in milliseconds."""
#108	return f"{mean_val:.2f} ± {std_val:.2f}"
#109
#110
#111	def fmt_size(size_bytes):
#112	"""Format bytes as human-readable."""
#113	if size_bytes < 1024:
#114	return f"{size_bytes} B"
#115	elif size_bytes < 1024 * 1024:
#116	return f"{size_bytes / 1024:.1f} KB"
#117	else:
#118	return f"{size_bytes / (1024 * 1024):.2f} MB"
#119
#120
#121	# ── Benchmark 1: Store Latency ───────────────────────────────────────────────
#122	def bench_store_latency():
#123	"""Measure time for beam.remember() at different corpus sizes."""
#124	print("\n" + "=" * 70)
#125	print("BENCHMARK 1: Store Latency (beam.remember)")
#126	print("=" * 70)
#127
#128	results = {}
#129	for size in STORE_SIZES:
#130	run_times = []
#131	for run in range(N_RUNS):
#132	beam, db_dir = fresh_db()
#133	try:
#134	# Warmup
#135	for i in range(WARMUP_MEMORIES):
#136	beam.remember(f"warmup {i}", source="bench_warmup")
#137
#138	# Timed store
#139	start = time.perf_counter()
#140	for i in range(size):
#141	beam.remember(
#142	generate_text(i),
#143	source="bench_store",
#144	importance=0.5 + (i % 5) * 0.1,
#145	)
#146	elapsed = time.perf_counter() - start
#147	run_times.append(elapsed)
#148	print(f" Store {size:>5} memories, run {run+1}: {elapsed*1000:.1f} ms "
#149	f"({elapsed/size*1000:.3f} ms/mem)")
#150	finally:
#151	cleanup(beam, db_dir)
#152
#153	mean_total = statistics.mean(run_times)
#154	std_total = statistics.stdev(run_times) if len(run_times) > 1 else 0.0
#155	mean_per = mean_total / size * 1000
#156	std_per = std_total / size * 1000
#157	results[size] = {
#158	"total_ms": fmt_ms(mean_total * 1000, std_total * 1000),
#159	"per_mem_ms": fmt_ms(mean_per, std_per),
#160	}
#161	print(f" → Store {size:>5}: total {mean_total1000:.1f} ± {std_total1000:.1f} ms, "
#162	f"per-mem {mean_per:.3f} ± {std_per:.3f} ms")
#163
#164	return results
#165
#166
#167	# ── Benchmark 2: Recall Latency ──────────────────────────────────────────────
#168	def bench_recall_latency():
#169	"""Measure recall() latency against different corpus sizes."""
#170	print("\n" + "=" * 70)
#171	print("BENCHMARK 2: Recall Latency (beam.recall)")
#172	print("=" * 70)
#173
#174	results = {}
#175	for corpus_size in RECALL_CORPUS_SIZES:
#176	# Build corpus once per run
#177	run_times = []
#178	for run in range(N_RUNS):
#179	beam, db_dir = fresh_db()
#180	try:
#181	# Populate corpus
#182	print(f" Populating {corpus_size} memories for run {run+1}...", end=" ", flush=True)
#183	pop_start = time.perf_counter()
#184	for i in range(corpus_size):
#185	beam.remember(
#186	generate_text(i),
#187	source="bench_recall_corpus",
#188	importance=0.3 + (i % 7) * 0.1,
#189	)
#190	pop_time = time.perf_counter() - pop_start
#191	print(f"done ({pop_time:.1f}s)")
#192
#193	# Warmup recall
#194	beam.recall("warmup query", top_k=5)
#195
#196	# Timed recalls
#197	recall_times = []
#198	for query in QUERIES:
#199	start = time.perf_counter()
#200	beam.recall(query, top_k=5)
#201	elapsed = time.perf_counter() - start
#202	recall_times.append(elapsed)
#203
#204	avg_recall = statistics.mean(recall_times)
#205	run_times.append(avg_recall)
#206	print(f" Recall over {corpus_size:>5} corpus, run {run+1}: "
#207	f"avg {avg_recall*1000:.2f} ms across {len(QUERIES)} queries")
#208
#209	finally:
#210	cleanup(beam, db_dir)
#211
#212	mean_val = statistics.mean(run_times)
#213	std_val = statistics.stdev(run_times) if len(run_times) > 1 else 0.0
#214	results[corpus_size] = {
#215	"avg_ms": fmt_ms(mean_val * 1000, std_val * 1000),
#216	}
#217	print(f" → Recall @ {corpus_size:>5} corpus: {mean_val1000:.2f} ± {std_val1000:.2f} ms")
#218
#219	return results
#220
#221
#222	# ── Benchmark 3: Memory Footprint ────────────────────────────────────────────
#223	def bench_db_size():
#224	"""Measure DB file size at different corpus sizes."""
#225	print("\n" + "=" * 70)
#226	print("BENCHMARK 3: Memory Footprint (DB file size)")
#227	print("=" * 70)
#228
#229	results = {}
#230	for size in [1000, 10000]:
#231	beam, db_dir = fresh_db()
#232	try:
#233	for i in range(size):
#234	beam.remember(
#235	generate_text(i),
#236	source="bench_size",
#237	importance=0.5,
#238	)
#239
#240	db_path = Path(db_dir) / "bench.db"
#241	db_size = os.path.getsize(db_path)
#242
#243	# Check for WAL and SHM files
#244	wal_path = Path(db_dir) / "bench.db-wal"
#245	shm_path = Path(db_dir) / "bench.db-shm"
#246	wal_size = os.path.getsize(wal_path) if wal_path.exists() else 0
#247	shm_size = os.path.getsize(shm_path) if shm_path.exists() else 0
#248
#249	# Force checkpoint to get realistic main DB size
#250	beam.conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
#251	db_size_after = os.path.getsize(db_path)
#252
#253	per_mem = db_size_after / size
#254	results[size] = {
#255	"total": fmt_size(db_size_after),
#256	"total_bytes": db_size_after,
#257	"per_mem_bytes": per_mem,
#258	}
#259	print(f" DB @ {size:>5} memories: {fmt_size(db_size_after)} "
#260	f"({per_mem:.0f} bytes/mem)")
#261	if wal_size:
#262	print(f" WAL: {fmt_size(wal_size)}, SHM: {fmt_size(shm_size)}")
#263
#264	finally:
#265	cleanup(beam, db_dir)
#266
#267	return results
#268
#269
#270	# ── Benchmark 4: Embedding Overhead ──────────────────────────────────────────
#271	def bench_embedding():
#272	"""Measure embedding time for different batch sizes."""
#273	print("\n" + "=" * 70)
#274	print("BENCHMARK 4: Embedding Overhead")
#275	print("=" * 70)
#276
#277	if not embeddings.available():
#278	print(" ⚠ fastembed not available — skipping embedding benchmark")
#279	return {}
#280
#281	# Warm up the model
#282	print(" Warming up embedding model...", end=" ", flush=True)
#283	embeddings.embed(["warmup text for model loading"])
#284	print("done")
#285
#286	results = {}
#287	for batch_size in EMBED_BATCHES:
#288	run_times = []
#289	for run in range(N_RUNS):
#290	texts = [f"This is benchmark embedding test sentence number {i}." for i in range(batch_size)]
#291	start = time.perf_counter()
#292	vecs = embeddings.embed(texts)
#293	elapsed = time.perf_counter() - start
#294	run_times.append(elapsed)
#295	print(f" Embed batch={batch_size:>3}, run {run+1}: {elapsed*1000:.2f} ms "
#296	f"({elapsed/batch_size*1000:.3f} ms/text)")
#297
#298	mean_val = statistics.mean(run_times)
#299	std_val = statistics.stdev(run_times) if len(run_times) > 1 else 0.0
#300	per_text = mean_val / batch_size * 1000
#301	results[batch_size] = {
#302	"total_ms": fmt_ms(mean_val * 1000, std_val * 1000),
#303	"per_text_ms": per_text,
#304	}
#305	print(f" → Embed batch {batch_size:>3}: {mean_val1000:.2f} ± {std_val1000:.2f} ms "
#306	f"({per_text:.3f} ms/text)")
#307
#308	return results
#309
#310
#311	# ── Summary Table ─────────────────────────────────────────────────────────────
#312	def print_summary(store, recall, db_size, embed):
#313	"""Print a clean markdown summary table."""
#314	print("\n" + "=" * 70)
#315	print("SUMMARY TABLE (Mnemosyne v2.0 Benchmark Results)")
#316	print("=" * 70)
#317	print(f"Runs: {N_RUNS} \| Date: {datetime.now().strftime('%Y-%m-%d %H:%M')}")
#318	print(f"Embedding model: BAAI/bge-small-en-v1.5 (384-dim, int8 vectors)")
#319	print()
#320
#321	# Store latency
#322	print("## Store Latency (beam.remember)")
#323	print()
#324	print("\| Corpus Size \| Total Time (ms) \| Per-Memory (ms) \|")
#325	print("\|------------:\|:----------------\|:----------------\|")
#326	for size, data in sorted(store.items()):
#327	print(f"\| {size:>11} \| {data['total_ms']} \| {data['per_mem_ms']} \|")
#328	print()
#329
#330	# Recall latency
#331	print("## Recall Latency (beam.recall, top_k=5)")
#332	print()
#333	print("\| Corpus Size \| Avg Query Time (ms) \|")
#334	print("\|------------:\|:--------------------\|")
#335	for size, data in sorted(recall.items()):
#336	print(f"\| {size:>11} \| {data['avg_ms']} \|")
#337	print()
#338
#339	# DB size
#340	print("## Memory Footprint (SQLite DB)")
#341	print()
#342	print("\| Memories \| Total Size \| Bytes/Memory \|")
#343	print("\|---------:\|:----------\|:------------:\|")
#344	for size, data in sorted(db_size.items()):
#345	print(f"\| {size:>8} \| {data['total']} \| {data['per_mem_bytes']:.0f} \|")
#346	print()
#347
#348	# Embedding overhead
#349	if embed:
#350	print("## Embedding Overhead (fastembed)")
#351	print()
#352	print("\| Batch Size \| Total Time (ms) \| Per-Text (ms) \|")
#353	print("\|-----------:\|:----------------\|:-------------:\|")
#354	for batch, data in sorted(embed.items()):
#355	print(f"\| {batch:>10} \| {data['total_ms']} \| {data['per_text_ms']:.3f} \|")
#356	print()
#357
#358
#359	# ── Main ──────────────────────────────────────────────────────────────────────
#360	if __name__ == "__main__":
#361	print("╔══════════════════════════════════════════════════════════════════════╗")
#362	print("║ Mnemosyne v2.0 Benchmark Suite ║")
#363	print("║ Store · Recall · Footprint · Embedding ║")
#364	print("╚══════════════════════════════════════════════════════════════════════╝")
#365	print(f" Python: {sys.version.split()[0]}")
#366	print(f" Runs: {N_RUNS} per benchmark")
#367	print(f" Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
#368
#369	store_results = bench_store_latency()
#370	recall_results = bench_recall_latency()
#371	db_size_results = bench_db_size()
#372	embed_results = bench_embedding()
#373
#374	print_summary(store_results, recall_results, db_size_results, embed_results)
#375
#376	print("Benchmark complete.")
#377

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public