repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources15d ago| #1 | #!/usr/bin/env python3 |
| #2 | """NAI-2 Benchmark — Temporal Decay Impact (v2, direct SQL timestamps)""" |
| #3 | import time, hashlib, random |
| #4 | from datetime import datetime, timedelta |
| #5 | from pathlib import Path |
| #6 | from mnemosyne.core.beam import BeamMemory |
| #7 | |
| #8 | now = datetime.now() |
| #9 | random.seed(42) |
| #10 | |
| #11 | SIGNALS = [ |
| #12 | (30, "user", "I use Python for backend development."), |
| #13 | (28, "user", "Using MySQL as my primary database."), |
| #14 | (25, "user", "API throughput is currently 500 requests per second."), |
| #15 | (23, "user", "Switching backend from Python to Go language."), |
| #16 | (20, "user", "Migrating database from MySQL to PostgreSQL."), |
| #17 | (18, "user", "Throughput improved to 5000 rps after Go migration."), |
| #18 | (15, "user", "Added Redis caching layer for better performance."), |
| #19 | (12, "user", "Throughput now at 10000 rps with Redis."), |
| #20 | (10, "user", "Deployed Go backend to production. Response time 250ms."), |
| #21 | (8, "user", "Running security audit with nmap on all servers."), |
| #22 | (6, "user", "All services now running as non-root for security."), |
| #23 | (4, "user", "Going to San Francisco next month for a conference."), |
| #24 | (2, "user", "Conference is in July. Need to finish deployment first."), |
| #25 | (1, "user", "Added gin framework for the Go API."), |
| #26 | (0, "user", "Final stack: Go + PostgreSQL + Redis + gin framework."), |
| #27 | (29, "assistant", "Python and MySQL noted. Let me know about scaling needs."), |
| #28 | (24, "assistant", "Go migration confirmed. gin or chi for web framework?"), |
| #29 | (19, "assistant", "PostgreSQL migration looks good. Updating records."), |
| #30 | (14, "assistant", "Redis caching is a smart move for throughput."), |
| #31 | (7, "assistant", "250ms response time is solid for Go backend."), |
| #32 | ] |
| #33 | |
| #34 | FILLER = [] |
| #35 | topics = [ |
| #36 | "checked the weather forecast", "read an article about AI", |
| #37 | "had lunch at a new restaurant", "updated my laptop OS", |
| #38 | "watched a tutorial video", "fixed a minor CSS bug", |
| #39 | "cleaned up old log files", "responded to email thread", |
| #40 | "updated npm dependencies", "ran a database backup", |
| #41 | "tested the new API endpoint", "reviewed a pull request", |
| #42 | "wrote unit tests for module", "configured CI pipeline", |
| #43 | "debugged a race condition", "optimized a slow query", |
| #44 | "added error handling", "refactored legacy code", |
| #45 | "set up monitoring alerts", "documented API changes", |
| #46 | ] |
| #47 | |
| #48 | for day in range(30, -1, -1): |
| #49 | n = random.randint(4, 8) |
| #50 | for _ in range(n): |
| #51 | topic = random.choice(topics) |
| #52 | FILLER.append((day, "system", f"Log: {topic} on day {day}.")) |
| #53 | |
| #54 | TIMELINE = sorted(SIGNALS + FILLER, key=lambda x: x[0], reverse=True) |
| #55 | |
| #56 | QUESTIONS = [ |
| #57 | ("What language does user use NOW?", ["Go"], False), |
| #58 | ("What database does user use NOW?", ["PostgreSQL"], False), |
| #59 | ("What is the current throughput?", ["10000", "10K"], False), |
| #60 | ("What caching is used?", ["Redis"], False), |
| #61 | ("What framework is used with Go?", ["gin"], False), |
| #62 | ("Where is the conference?", ["San Francisco"], False), |
| #63 | ("What was the ORIGINAL language?", ["Python"], True), |
| #64 | ("What was the ORIGINAL database?", ["MySQL"], True), |
| #65 | ("What was the ORIGINAL throughput?", ["500"], True), |
| #66 | ("What throughput after Go migration?", ["5000", "5K"], True), |
| #67 | ("What security tool was recommended?", ["nmap"], False), |
| #68 | ("What security practice implemented?", ["non-root"], False), |
| #69 | ("When is the conference?", ["July"], False), |
| #70 | ("What was the response time?", ["250ms"], False), |
| #71 | ("What changed: lang, db, or cache?", ["Go", "PostgreSQL", "Redis"], False), |
| #72 | ("What was added most recently?", ["gin"], False), |
| #73 | ("What did the user start with?", ["Python", "MySQL"], True), |
| #74 | ("What does the user use now?", ["Go", "PostgreSQL", "Redis"], False), |
| #75 | ("BEFORE prod deploy, what was throughput?", ["5000", "5K"], True), |
| #76 | ("What was the security audit result?", ["non-root", "nmap"], False), |
| #77 | ] |
| #78 | |
| #79 | |
| #80 | def score_answer(predicted, expected): |
| #81 | hl = predicted.lower() |
| #82 | hits = sum(1 for kw in expected if kw.lower() in hl) |
| #83 | return hits / len(expected) if expected else 0.0 |
| #84 | |
| #85 | |
| #86 | def run_bench(label, temporal_weight=0.0, temporal_halflife=168): |
| #87 | import tempfile, os |
| #88 | tmp = tempfile.NamedTemporaryFile(suffix=".db", delete=False) |
| #89 | db_path = Path(tmp.name) |
| #90 | tmp.close() |
| #91 | |
| #92 | beam = BeamMemory(session_id=f"bench_nai2_{label}", db_path=db_path) |
| #93 | |
| #94 | # Direct SQL insertion with correct timestamps |
| #95 | t0 = time.time() |
| #96 | for i, (days_ago, role, content) in enumerate(TIMELINE): |
| #97 | ts = (now - timedelta(days=days_ago)).isoformat() |
| #98 | mid = hashlib.sha256(f"{label}{i}{content}".encode()).hexdigest()[:16] |
| #99 | beam.conn.execute( |
| #100 | """INSERT OR IGNORE INTO working_memory |
| #101 | (id, content, source, timestamp, session_id, importance, scope) |
| #102 | VALUES (?, ?, ?, ?, ?, ?, 'global')""", |
| #103 | (mid, f"[{role}] {content}", role, ts, f"bench_nai2_{label}", 0.5) |
| #104 | ) |
| #105 | beam.conn.commit() |
| #106 | ingest_ms = round((time.time()-t0)*1000) |
| #107 | |
| #108 | total = beam.conn.execute("SELECT COUNT(*) FROM working_memory").fetchone()[0] |
| #109 | |
| #110 | all_scores, curr, hist = [], [], [] |
| #111 | first_scores, first_curr, first_hist = [], [], [] |
| #112 | latencies = [] |
| #113 | |
| #114 | for question, expected, is_historical in QUESTIONS: |
| #115 | t0 = time.time() |
| #116 | results = beam.recall( |
| #117 | question, top_k=10, |
| #118 | temporal_weight=temporal_weight, |
| #119 | temporal_halflife=temporal_halflife, |
| #120 | ) |
| #121 | latencies.append(round((time.time()-t0)*1000)) |
| #122 | top5 = " ".join(r.get("content","")[:150] for r in results[:5]) |
| #123 | s = score_answer(top5, expected) |
| #124 | all_scores.append(s) |
| #125 | (hist if is_historical else curr).append(s) |
| #126 | |
| #127 | # First-answer score: only look at result #1 |
| #128 | first = results[0].get("content","")[:150] if results else "" |
| #129 | f = score_answer(first, expected) |
| #130 | first_scores.append(f) |
| #131 | (first_hist if is_historical else first_curr).append(f) |
| #132 | |
| #133 | beam.conn.close() |
| #134 | os.unlink(str(db_path)) |
| #135 | |
| #136 | return { |
| #137 | "label": label, |
| #138 | "ingest_ms": ingest_ms, |
| #139 | "total": total, |
| #140 | "avg_top5": round(sum(all_scores)/len(all_scores), 3), |
| #141 | "avg_current": round(sum(curr)/len(curr), 3) if curr else 0, |
| #142 | "avg_historical": round(sum(hist)/len(hist), 3) if hist else 0, |
| #143 | "avg_first": round(sum(first_scores)/len(first_scores), 3), |
| #144 | "avg_first_curr": round(sum(first_curr)/len(first_curr), 3) if first_curr else 0, |
| #145 | "p50_ms": sorted(latencies)[len(latencies)//2], |
| #146 | } |
| #147 | |
| #148 | |
| #149 | if __name__ == "__main__": |
| #150 | print(f"Timeline: {len(TIMELINE)} msgs ({len(SIGNALS)} signals + {len(FILLER)} filler)") |
| #151 | print(f"Questions: {len(QUESTIONS)} (11 current, 9 historical)") |
| #152 | print() |
| #153 | |
| #154 | bl = run_bench("baseline", temporal_weight=0.0) |
| #155 | print(f"--- BASELINE (no temporal, k=10, {bl['total']} msgs) ---") |
| #156 | print(f" Top-5: {bl['avg_top5']:.3f} First: {bl['avg_first']:.3f} Cur: {bl['avg_current']:.3f}/{bl['avg_first_curr']:.3f} Hist: {bl['avg_historical']:.3f} | {bl['p50_ms']}ms ingest {bl['ingest_ms']}ms") |
| #157 | |
| #158 | t1 = run_bench("temporal", temporal_weight=0.3, temporal_halflife=24) |
| #159 | print(f"--- TEMPORAL (w=0.3, h=24h, {t1['total']} msgs) ---") |
| #160 | print(f" Top-5: {t1['avg_top5']:.3f} First: {t1['avg_first']:.3f} Cur: {t1['avg_current']:.3f}/{t1['avg_first_curr']:.3f} Hist: {t1['avg_historical']:.3f} | {t1['p50_ms']}ms ingest {t1['ingest_ms']}ms") |
| #161 | |
| #162 | t2 = run_bench("strong", temporal_weight=0.6, temporal_halflife=12) |
| #163 | print(f"--- STRONG (w=0.6, h=12h, {t2['total']} msgs) ---") |
| #164 | print(f" Top-5: {t2['avg_top5']:.3f} First: {t2['avg_first']:.3f} Cur: {t2['avg_current']:.3f}/{t2['avg_first_curr']:.3f} Hist: {t2['avg_historical']:.3f} | {t2['p50_ms']}ms ingest {t2['ingest_ms']}ms") |
| #165 | |
| #166 | dc = t1['avg_first_curr'] - bl['avg_first_curr'] |
| #167 | dh = t1['avg_historical'] - bl['avg_historical'] |
| #168 | print() |
| #169 | print(f"CURRENT: {bl['avg_current']:.3f} → {t1['avg_current']:.3f} → {t2['avg_current']:.3f}") |
| #170 | print(f"HISTORICAL: {bl['avg_historical']:.3f} → {t1['avg_historical']:.3f} → {t2['avg_historical']:.3f}") |
| #171 | print(f"DELTA cur={dc:+.3f} hist={dh:+.3f}") |
| #172 |