my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	#!/usr/bin/env python3
#2	"""NAI-2 Benchmark — Temporal Decay Impact (v2, direct SQL timestamps)"""
#3	import time, hashlib, random
#4	from datetime import datetime, timedelta
#5	from pathlib import Path
#6	from mnemosyne.core.beam import BeamMemory
#7
#8	now = datetime.now()
#9	random.seed(42)
#10
#11	SIGNALS = [
#12	(30, "user", "I use Python for backend development."),
#13	(28, "user", "Using MySQL as my primary database."),
#14	(25, "user", "API throughput is currently 500 requests per second."),
#15	(23, "user", "Switching backend from Python to Go language."),
#16	(20, "user", "Migrating database from MySQL to PostgreSQL."),
#17	(18, "user", "Throughput improved to 5000 rps after Go migration."),
#18	(15, "user", "Added Redis caching layer for better performance."),
#19	(12, "user", "Throughput now at 10000 rps with Redis."),
#20	(10, "user", "Deployed Go backend to production. Response time 250ms."),
#21	(8, "user", "Running security audit with nmap on all servers."),
#22	(6, "user", "All services now running as non-root for security."),
#23	(4, "user", "Going to San Francisco next month for a conference."),
#24	(2, "user", "Conference is in July. Need to finish deployment first."),
#25	(1, "user", "Added gin framework for the Go API."),
#26	(0, "user", "Final stack: Go + PostgreSQL + Redis + gin framework."),
#27	(29, "assistant", "Python and MySQL noted. Let me know about scaling needs."),
#28	(24, "assistant", "Go migration confirmed. gin or chi for web framework?"),
#29	(19, "assistant", "PostgreSQL migration looks good. Updating records."),
#30	(14, "assistant", "Redis caching is a smart move for throughput."),
#31	(7, "assistant", "250ms response time is solid for Go backend."),
#32	]
#33
#34	FILLER = []
#35	topics = [
#36	"checked the weather forecast", "read an article about AI",
#37	"had lunch at a new restaurant", "updated my laptop OS",
#38	"watched a tutorial video", "fixed a minor CSS bug",
#39	"cleaned up old log files", "responded to email thread",
#40	"updated npm dependencies", "ran a database backup",
#41	"tested the new API endpoint", "reviewed a pull request",
#42	"wrote unit tests for module", "configured CI pipeline",
#43	"debugged a race condition", "optimized a slow query",
#44	"added error handling", "refactored legacy code",
#45	"set up monitoring alerts", "documented API changes",
#46	]
#47
#48	for day in range(30, -1, -1):
#49	n = random.randint(4, 8)
#50	for _ in range(n):
#51	topic = random.choice(topics)
#52	FILLER.append((day, "system", f"Log: {topic} on day {day}."))
#53
#54	TIMELINE = sorted(SIGNALS + FILLER, key=lambda x: x[0], reverse=True)
#55
#56	QUESTIONS = [
#57	("What language does user use NOW?", ["Go"], False),
#58	("What database does user use NOW?", ["PostgreSQL"], False),
#59	("What is the current throughput?", ["10000", "10K"], False),
#60	("What caching is used?", ["Redis"], False),
#61	("What framework is used with Go?", ["gin"], False),
#62	("Where is the conference?", ["San Francisco"], False),
#63	("What was the ORIGINAL language?", ["Python"], True),
#64	("What was the ORIGINAL database?", ["MySQL"], True),
#65	("What was the ORIGINAL throughput?", ["500"], True),
#66	("What throughput after Go migration?", ["5000", "5K"], True),
#67	("What security tool was recommended?", ["nmap"], False),
#68	("What security practice implemented?", ["non-root"], False),
#69	("When is the conference?", ["July"], False),
#70	("What was the response time?", ["250ms"], False),
#71	("What changed: lang, db, or cache?", ["Go", "PostgreSQL", "Redis"], False),
#72	("What was added most recently?", ["gin"], False),
#73	("What did the user start with?", ["Python", "MySQL"], True),
#74	("What does the user use now?", ["Go", "PostgreSQL", "Redis"], False),
#75	("BEFORE prod deploy, what was throughput?", ["5000", "5K"], True),
#76	("What was the security audit result?", ["non-root", "nmap"], False),
#77	]
#78
#79
#80	def score_answer(predicted, expected):
#81	hl = predicted.lower()
#82	hits = sum(1 for kw in expected if kw.lower() in hl)
#83	return hits / len(expected) if expected else 0.0
#84
#85
#86	def run_bench(label, temporal_weight=0.0, temporal_halflife=168):
#87	import tempfile, os
#88	tmp = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
#89	db_path = Path(tmp.name)
#90	tmp.close()
#91
#92	beam = BeamMemory(session_id=f"bench_nai2_{label}", db_path=db_path)
#93
#94	# Direct SQL insertion with correct timestamps
#95	t0 = time.time()
#96	for i, (days_ago, role, content) in enumerate(TIMELINE):
#97	ts = (now - timedelta(days=days_ago)).isoformat()
#98	mid = hashlib.sha256(f"{label}{i}{content}".encode()).hexdigest()[:16]
#99	beam.conn.execute(
#100	"""INSERT OR IGNORE INTO working_memory
#101	(id, content, source, timestamp, session_id, importance, scope)
#102	VALUES (?, ?, ?, ?, ?, ?, 'global')""",
#103	(mid, f"[{role}] {content}", role, ts, f"bench_nai2_{label}", 0.5)
#104	)
#105	beam.conn.commit()
#106	ingest_ms = round((time.time()-t0)*1000)
#107
#108	total = beam.conn.execute("SELECT COUNT(*) FROM working_memory").fetchone()[0]
#109
#110	all_scores, curr, hist = [], [], []
#111	first_scores, first_curr, first_hist = [], [], []
#112	latencies = []
#113
#114	for question, expected, is_historical in QUESTIONS:
#115	t0 = time.time()
#116	results = beam.recall(
#117	question, top_k=10,
#118	temporal_weight=temporal_weight,
#119	temporal_halflife=temporal_halflife,
#120	)
#121	latencies.append(round((time.time()-t0)*1000))
#122	top5 = " ".join(r.get("content","")[:150] for r in results[:5])
#123	s = score_answer(top5, expected)
#124	all_scores.append(s)
#125	(hist if is_historical else curr).append(s)
#126
#127	# First-answer score: only look at result #1
#128	first = results[0].get("content","")[:150] if results else ""
#129	f = score_answer(first, expected)
#130	first_scores.append(f)
#131	(first_hist if is_historical else first_curr).append(f)
#132
#133	beam.conn.close()
#134	os.unlink(str(db_path))
#135
#136	return {
#137	"label": label,
#138	"ingest_ms": ingest_ms,
#139	"total": total,
#140	"avg_top5": round(sum(all_scores)/len(all_scores), 3),
#141	"avg_current": round(sum(curr)/len(curr), 3) if curr else 0,
#142	"avg_historical": round(sum(hist)/len(hist), 3) if hist else 0,
#143	"avg_first": round(sum(first_scores)/len(first_scores), 3),
#144	"avg_first_curr": round(sum(first_curr)/len(first_curr), 3) if first_curr else 0,
#145	"p50_ms": sorted(latencies)[len(latencies)//2],
#146	}
#147
#148
#149	if __name__ == "__main__":
#150	print(f"Timeline: {len(TIMELINE)} msgs ({len(SIGNALS)} signals + {len(FILLER)} filler)")
#151	print(f"Questions: {len(QUESTIONS)} (11 current, 9 historical)")
#152	print()
#153
#154	bl = run_bench("baseline", temporal_weight=0.0)
#155	print(f"--- BASELINE (no temporal, k=10, {bl['total']} msgs) ---")
#156	print(f" Top-5: {bl['avg_top5']:.3f} First: {bl['avg_first']:.3f} Cur: {bl['avg_current']:.3f}/{bl['avg_first_curr']:.3f} Hist: {bl['avg_historical']:.3f} \| {bl['p50_ms']}ms ingest {bl['ingest_ms']}ms")
#157
#158	t1 = run_bench("temporal", temporal_weight=0.3, temporal_halflife=24)
#159	print(f"--- TEMPORAL (w=0.3, h=24h, {t1['total']} msgs) ---")
#160	print(f" Top-5: {t1['avg_top5']:.3f} First: {t1['avg_first']:.3f} Cur: {t1['avg_current']:.3f}/{t1['avg_first_curr']:.3f} Hist: {t1['avg_historical']:.3f} \| {t1['p50_ms']}ms ingest {t1['ingest_ms']}ms")
#161
#162	t2 = run_bench("strong", temporal_weight=0.6, temporal_halflife=12)
#163	print(f"--- STRONG (w=0.6, h=12h, {t2['total']} msgs) ---")
#164	print(f" Top-5: {t2['avg_top5']:.3f} First: {t2['avg_first']:.3f} Cur: {t2['avg_current']:.3f}/{t2['avg_first_curr']:.3f} Hist: {t2['avg_historical']:.3f} \| {t2['p50_ms']}ms ingest {t2['ingest_ms']}ms")
#165
#166	dc = t1['avg_first_curr'] - bl['avg_first_curr']
#167	dh = t1['avg_historical'] - bl['avg_historical']
#168	print()
#169	print(f"CURRENT: {bl['avg_current']:.3f} → {t1['avg_current']:.3f} → {t2['avg_current']:.3f}")
#170	print(f"HISTORICAL: {bl['avg_historical']:.3f} → {t1['avg_historical']:.3f} → {t2['avg_historical']:.3f}")
#171	print(f"DELTA cur={dc:+.3f} hist={dh:+.3f}")
#172

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public