repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources15d ago| #1 | #!/usr/bin/env python3 |
| #2 | """ |
| #3 | Mnemosyne v2.0 Benchmark Suite |
| #4 | ================================= |
| #5 | Measures store latency, recall latency, DB size, and embedding overhead. |
| #6 | Uses a temp directory for the DB (no project pollution). |
| #7 | Reports mean ± std across 3 runs in milliseconds. |
| #8 | """ |
| #9 | |
| #10 | import os |
| #11 | import sys |
| #12 | import time |
| #13 | import json |
| #14 | import shutil |
| #15 | import tempfile |
| #16 | import statistics |
| #17 | from pathlib import Path |
| #18 | from datetime import datetime |
| #19 | |
| #20 | # Ensure project root is on path |
| #21 | PROJECT = Path(__file__).resolve().parent |
| #22 | sys.path.insert(0, str(PROJECT)) |
| #23 | |
| #24 | from mnemosyne.core.beam import BeamMemory, init_beam |
| #25 | from mnemosyne.core import embeddings |
| #26 | |
| #27 | # ── Configuration ──────────────────────────────────────────────────────────── |
| #28 | N_RUNS = 3 |
| #29 | STORE_SIZES = [100, 1000, 10000] |
| #30 | RECALL_CORPUS_SIZES = [1000, 10000] |
| #31 | EMBED_BATCHES = [1, 10, 100] |
| #32 | WARMUP_MEMORIES = 5 |
| #33 | |
| #34 | # Sample texts for generating varied memories |
| #35 | SAMPLE_TEXTS = [ |
| #36 | "The user prefers dark mode in all applications and finds light mode straining.", |
| #37 | "Important meeting scheduled for next Tuesday at 3 PM with the engineering team.", |
| #38 | "The project deadline has been moved to December 15th due to vendor delays.", |
| #39 | "User's preferred programming language is Python for data analysis tasks.", |
| #40 | "The database migration completed successfully with zero downtime last weekend.", |
| #41 | "Customer reported a critical bug in the payment processing module yesterday.", |
| #42 | "Team standup meetings are held every Monday, Wednesday, and Friday at 9 AM.", |
| #43 | "The new API endpoint for user authentication has been deployed to production.", |
| #44 | "Server monitoring shows CPU usage has been consistently above 80% this week.", |
| #45 | "The machine learning model achieved 94.7% accuracy on the test dataset.", |
| #46 | "Documentation for the REST API needs to be updated before the next release.", |
| #47 | "The caching layer reduced average response times from 450ms to 32ms.", |
| #48 | "User requested a feature to export data in CSV and JSON formats.", |
| #49 | "The Kubernetes cluster was upgraded to version 1.28 without any issues.", |
| #50 | "Memory optimization reduced the application's RAM usage by 40%.", |
| #51 | "The integration tests now cover 87% of the critical code paths.", |
| #52 | "A new team member will be joining the backend team starting next month.", |
| #53 | "The CI/CD pipeline completes in approximately 12 minutes on average.", |
| #54 | "Security audit identified three medium-severity vulnerabilities to patch.", |
| #55 | "The GraphQL schema was refactored to support pagination on all queries.", |
| #56 | "Load testing showed the system can handle 10,000 concurrent connections.", |
| #57 | "The user's timezone is UTC-5 (Eastern Time) for scheduling purposes.", |
| #58 | "Redis cache hit rate is currently at 96.3% across all services.", |
| #59 | "The front-end bundle size was reduced from 2.1MB to 890KB after optimization.", |
| #60 | "Automated backups run every 6 hours and are retained for 30 days.", |
| #61 | "The user prefers concise summaries over detailed explanations.", |
| #62 | "Network latency between the app and database servers averages 2.3ms.", |
| #63 | "The staging environment mirrors production with 75% of real data volumes.", |
| #64 | "Code coverage increased from 62% to 78% after the sprint testing push.", |
| #65 | "The webhook integration with Slack sends alerts for all critical events.", |
| #66 | ] |
| #67 | |
| #68 | QUERIES = [ |
| #69 | "user preferences for UI settings", |
| #70 | "meeting schedule and calendar events", |
| #71 | "database performance issues", |
| #72 | "API endpoint configuration", |
| #73 | "security vulnerabilities and patches", |
| #74 | "machine learning model accuracy", |
| #75 | "deployment and release timeline", |
| #76 | "team member updates", |
| #77 | "caching strategy optimization", |
| #78 | "testing and code coverage", |
| #79 | ] |
| #80 | |
| #81 | |
| #82 | def generate_text(idx: int) -> str: |
| #83 | """Generate unique memory text from index.""" |
| #84 | base = SAMPLE_TEXTS[idx % len(SAMPLE_TEXTS)] |
| #85 | return f"{base} [ref-{idx:06d}]" |
| #86 | |
| #87 | |
| #88 | def fresh_db() -> tuple: |
| #89 | """Create a fresh BeamMemory with a temp DB path. Returns (beam, db_dir).""" |
| #90 | db_dir = tempfile.mkdtemp(prefix="mnemosyne_bench_") |
| #91 | db_path = Path(db_dir) / "bench.db" |
| #92 | init_beam(db_path) |
| #93 | beam = BeamMemory(db_path=str(db_path)) |
| #94 | return beam, db_dir |
| #95 | |
| #96 | |
| #97 | def cleanup(beam, db_dir): |
| #98 | """Close connection and remove temp dir.""" |
| #99 | try: |
| #100 | beam.conn.close() |
| #101 | except Exception: |
| #102 | pass |
| #103 | shutil.rmtree(db_dir, ignore_errors=True) |
| #104 | |
| #105 | |
| #106 | def fmt_ms(mean_val, std_val, unit="ms"): |
| #107 | """Format mean ± std in milliseconds.""" |
| #108 | return f"{mean_val:.2f} ± {std_val:.2f}" |
| #109 | |
| #110 | |
| #111 | def fmt_size(size_bytes): |
| #112 | """Format bytes as human-readable.""" |
| #113 | if size_bytes < 1024: |
| #114 | return f"{size_bytes} B" |
| #115 | elif size_bytes < 1024 * 1024: |
| #116 | return f"{size_bytes / 1024:.1f} KB" |
| #117 | else: |
| #118 | return f"{size_bytes / (1024 * 1024):.2f} MB" |
| #119 | |
| #120 | |
| #121 | # ── Benchmark 1: Store Latency ─────────────────────────────────────────────── |
| #122 | def bench_store_latency(): |
| #123 | """Measure time for beam.remember() at different corpus sizes.""" |
| #124 | print("\n" + "=" * 70) |
| #125 | print("BENCHMARK 1: Store Latency (beam.remember)") |
| #126 | print("=" * 70) |
| #127 | |
| #128 | results = {} |
| #129 | for size in STORE_SIZES: |
| #130 | run_times = [] |
| #131 | for run in range(N_RUNS): |
| #132 | beam, db_dir = fresh_db() |
| #133 | try: |
| #134 | # Warmup |
| #135 | for i in range(WARMUP_MEMORIES): |
| #136 | beam.remember(f"warmup {i}", source="bench_warmup") |
| #137 | |
| #138 | # Timed store |
| #139 | start = time.perf_counter() |
| #140 | for i in range(size): |
| #141 | beam.remember( |
| #142 | generate_text(i), |
| #143 | source="bench_store", |
| #144 | importance=0.5 + (i % 5) * 0.1, |
| #145 | ) |
| #146 | elapsed = time.perf_counter() - start |
| #147 | run_times.append(elapsed) |
| #148 | print(f" Store {size:>5} memories, run {run+1}: {elapsed*1000:.1f} ms " |
| #149 | f"({elapsed/size*1000:.3f} ms/mem)") |
| #150 | finally: |
| #151 | cleanup(beam, db_dir) |
| #152 | |
| #153 | mean_total = statistics.mean(run_times) |
| #154 | std_total = statistics.stdev(run_times) if len(run_times) > 1 else 0.0 |
| #155 | mean_per = mean_total / size * 1000 |
| #156 | std_per = std_total / size * 1000 |
| #157 | results[size] = { |
| #158 | "total_ms": fmt_ms(mean_total * 1000, std_total * 1000), |
| #159 | "per_mem_ms": fmt_ms(mean_per, std_per), |
| #160 | } |
| #161 | print(f" → Store {size:>5}: total {mean_total*1000:.1f} ± {std_total*1000:.1f} ms, " |
| #162 | f"per-mem {mean_per:.3f} ± {std_per:.3f} ms") |
| #163 | |
| #164 | return results |
| #165 | |
| #166 | |
| #167 | # ── Benchmark 2: Recall Latency ────────────────────────────────────────────── |
| #168 | def bench_recall_latency(): |
| #169 | """Measure recall() latency against different corpus sizes.""" |
| #170 | print("\n" + "=" * 70) |
| #171 | print("BENCHMARK 2: Recall Latency (beam.recall)") |
| #172 | print("=" * 70) |
| #173 | |
| #174 | results = {} |
| #175 | for corpus_size in RECALL_CORPUS_SIZES: |
| #176 | # Build corpus once per run |
| #177 | run_times = [] |
| #178 | for run in range(N_RUNS): |
| #179 | beam, db_dir = fresh_db() |
| #180 | try: |
| #181 | # Populate corpus |
| #182 | print(f" Populating {corpus_size} memories for run {run+1}...", end=" ", flush=True) |
| #183 | pop_start = time.perf_counter() |
| #184 | for i in range(corpus_size): |
| #185 | beam.remember( |
| #186 | generate_text(i), |
| #187 | source="bench_recall_corpus", |
| #188 | importance=0.3 + (i % 7) * 0.1, |
| #189 | ) |
| #190 | pop_time = time.perf_counter() - pop_start |
| #191 | print(f"done ({pop_time:.1f}s)") |
| #192 | |
| #193 | # Warmup recall |
| #194 | beam.recall("warmup query", top_k=5) |
| #195 | |
| #196 | # Timed recalls |
| #197 | recall_times = [] |
| #198 | for query in QUERIES: |
| #199 | start = time.perf_counter() |
| #200 | beam.recall(query, top_k=5) |
| #201 | elapsed = time.perf_counter() - start |
| #202 | recall_times.append(elapsed) |
| #203 | |
| #204 | avg_recall = statistics.mean(recall_times) |
| #205 | run_times.append(avg_recall) |
| #206 | print(f" Recall over {corpus_size:>5} corpus, run {run+1}: " |
| #207 | f"avg {avg_recall*1000:.2f} ms across {len(QUERIES)} queries") |
| #208 | |
| #209 | finally: |
| #210 | cleanup(beam, db_dir) |
| #211 | |
| #212 | mean_val = statistics.mean(run_times) |
| #213 | std_val = statistics.stdev(run_times) if len(run_times) > 1 else 0.0 |
| #214 | results[corpus_size] = { |
| #215 | "avg_ms": fmt_ms(mean_val * 1000, std_val * 1000), |
| #216 | } |
| #217 | print(f" → Recall @ {corpus_size:>5} corpus: {mean_val*1000:.2f} ± {std_val*1000:.2f} ms") |
| #218 | |
| #219 | return results |
| #220 | |
| #221 | |
| #222 | # ── Benchmark 3: Memory Footprint ──────────────────────────────────────────── |
| #223 | def bench_db_size(): |
| #224 | """Measure DB file size at different corpus sizes.""" |
| #225 | print("\n" + "=" * 70) |
| #226 | print("BENCHMARK 3: Memory Footprint (DB file size)") |
| #227 | print("=" * 70) |
| #228 | |
| #229 | results = {} |
| #230 | for size in [1000, 10000]: |
| #231 | beam, db_dir = fresh_db() |
| #232 | try: |
| #233 | for i in range(size): |
| #234 | beam.remember( |
| #235 | generate_text(i), |
| #236 | source="bench_size", |
| #237 | importance=0.5, |
| #238 | ) |
| #239 | |
| #240 | db_path = Path(db_dir) / "bench.db" |
| #241 | db_size = os.path.getsize(db_path) |
| #242 | |
| #243 | # Check for WAL and SHM files |
| #244 | wal_path = Path(db_dir) / "bench.db-wal" |
| #245 | shm_path = Path(db_dir) / "bench.db-shm" |
| #246 | wal_size = os.path.getsize(wal_path) if wal_path.exists() else 0 |
| #247 | shm_size = os.path.getsize(shm_path) if shm_path.exists() else 0 |
| #248 | |
| #249 | # Force checkpoint to get realistic main DB size |
| #250 | beam.conn.execute("PRAGMA wal_checkpoint(TRUNCATE)") |
| #251 | db_size_after = os.path.getsize(db_path) |
| #252 | |
| #253 | per_mem = db_size_after / size |
| #254 | results[size] = { |
| #255 | "total": fmt_size(db_size_after), |
| #256 | "total_bytes": db_size_after, |
| #257 | "per_mem_bytes": per_mem, |
| #258 | } |
| #259 | print(f" DB @ {size:>5} memories: {fmt_size(db_size_after)} " |
| #260 | f"({per_mem:.0f} bytes/mem)") |
| #261 | if wal_size: |
| #262 | print(f" WAL: {fmt_size(wal_size)}, SHM: {fmt_size(shm_size)}") |
| #263 | |
| #264 | finally: |
| #265 | cleanup(beam, db_dir) |
| #266 | |
| #267 | return results |
| #268 | |
| #269 | |
| #270 | # ── Benchmark 4: Embedding Overhead ────────────────────────────────────────── |
| #271 | def bench_embedding(): |
| #272 | """Measure embedding time for different batch sizes.""" |
| #273 | print("\n" + "=" * 70) |
| #274 | print("BENCHMARK 4: Embedding Overhead") |
| #275 | print("=" * 70) |
| #276 | |
| #277 | if not embeddings.available(): |
| #278 | print(" ⚠ fastembed not available — skipping embedding benchmark") |
| #279 | return {} |
| #280 | |
| #281 | # Warm up the model |
| #282 | print(" Warming up embedding model...", end=" ", flush=True) |
| #283 | embeddings.embed(["warmup text for model loading"]) |
| #284 | print("done") |
| #285 | |
| #286 | results = {} |
| #287 | for batch_size in EMBED_BATCHES: |
| #288 | run_times = [] |
| #289 | for run in range(N_RUNS): |
| #290 | texts = [f"This is benchmark embedding test sentence number {i}." for i in range(batch_size)] |
| #291 | start = time.perf_counter() |
| #292 | vecs = embeddings.embed(texts) |
| #293 | elapsed = time.perf_counter() - start |
| #294 | run_times.append(elapsed) |
| #295 | print(f" Embed batch={batch_size:>3}, run {run+1}: {elapsed*1000:.2f} ms " |
| #296 | f"({elapsed/batch_size*1000:.3f} ms/text)") |
| #297 | |
| #298 | mean_val = statistics.mean(run_times) |
| #299 | std_val = statistics.stdev(run_times) if len(run_times) > 1 else 0.0 |
| #300 | per_text = mean_val / batch_size * 1000 |
| #301 | results[batch_size] = { |
| #302 | "total_ms": fmt_ms(mean_val * 1000, std_val * 1000), |
| #303 | "per_text_ms": per_text, |
| #304 | } |
| #305 | print(f" → Embed batch {batch_size:>3}: {mean_val*1000:.2f} ± {std_val*1000:.2f} ms " |
| #306 | f"({per_text:.3f} ms/text)") |
| #307 | |
| #308 | return results |
| #309 | |
| #310 | |
| #311 | # ── Summary Table ───────────────────────────────────────────────────────────── |
| #312 | def print_summary(store, recall, db_size, embed): |
| #313 | """Print a clean markdown summary table.""" |
| #314 | print("\n" + "=" * 70) |
| #315 | print("SUMMARY TABLE (Mnemosyne v2.0 Benchmark Results)") |
| #316 | print("=" * 70) |
| #317 | print(f"Runs: {N_RUNS} | Date: {datetime.now().strftime('%Y-%m-%d %H:%M')}") |
| #318 | print(f"Embedding model: BAAI/bge-small-en-v1.5 (384-dim, int8 vectors)") |
| #319 | print() |
| #320 | |
| #321 | # Store latency |
| #322 | print("## Store Latency (beam.remember)") |
| #323 | print() |
| #324 | print("| Corpus Size | Total Time (ms) | Per-Memory (ms) |") |
| #325 | print("|------------:|:----------------|:----------------|") |
| #326 | for size, data in sorted(store.items()): |
| #327 | print(f"| {size:>11} | {data['total_ms']} | {data['per_mem_ms']} |") |
| #328 | print() |
| #329 | |
| #330 | # Recall latency |
| #331 | print("## Recall Latency (beam.recall, top_k=5)") |
| #332 | print() |
| #333 | print("| Corpus Size | Avg Query Time (ms) |") |
| #334 | print("|------------:|:--------------------|") |
| #335 | for size, data in sorted(recall.items()): |
| #336 | print(f"| {size:>11} | {data['avg_ms']} |") |
| #337 | print() |
| #338 | |
| #339 | # DB size |
| #340 | print("## Memory Footprint (SQLite DB)") |
| #341 | print() |
| #342 | print("| Memories | Total Size | Bytes/Memory |") |
| #343 | print("|---------:|:----------|:------------:|") |
| #344 | for size, data in sorted(db_size.items()): |
| #345 | print(f"| {size:>8} | {data['total']} | {data['per_mem_bytes']:.0f} |") |
| #346 | print() |
| #347 | |
| #348 | # Embedding overhead |
| #349 | if embed: |
| #350 | print("## Embedding Overhead (fastembed)") |
| #351 | print() |
| #352 | print("| Batch Size | Total Time (ms) | Per-Text (ms) |") |
| #353 | print("|-----------:|:----------------|:-------------:|") |
| #354 | for batch, data in sorted(embed.items()): |
| #355 | print(f"| {batch:>10} | {data['total_ms']} | {data['per_text_ms']:.3f} |") |
| #356 | print() |
| #357 | |
| #358 | |
| #359 | # ── Main ────────────────────────────────────────────────────────────────────── |
| #360 | if __name__ == "__main__": |
| #361 | print("╔══════════════════════════════════════════════════════════════════════╗") |
| #362 | print("║ Mnemosyne v2.0 Benchmark Suite ║") |
| #363 | print("║ Store · Recall · Footprint · Embedding ║") |
| #364 | print("╚══════════════════════════════════════════════════════════════════════╝") |
| #365 | print(f" Python: {sys.version.split()[0]}") |
| #366 | print(f" Runs: {N_RUNS} per benchmark") |
| #367 | print(f" Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") |
| #368 | |
| #369 | store_results = bench_store_latency() |
| #370 | recall_results = bench_recall_latency() |
| #371 | db_size_results = bench_db_size() |
| #372 | embed_results = bench_embedding() |
| #373 | |
| #374 | print_summary(store_results, recall_results, db_size_results, embed_results) |
| #375 | |
| #376 | print("Benchmark complete.") |
| #377 |