repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | """ |
| #2 | Hindsight memory provider importer. |
| #3 | |
| #4 | Hindsight is a Hermes memory backend that stores consolidated memories with |
| #5 | historical timestamps and fact types. Unlike regular interactive writes, a |
| #6 | migration must preserve that history, so this importer writes to Mnemosyne's |
| #7 | ``episodic_memory`` table directly instead of routing every item through |
| #8 | ``remember()`` (which would assign the current timestamp and working-memory |
| #9 | session). |
| #10 | |
| #11 | Supported inputs: |
| #12 | - JSON export files containing a list of memories |
| #13 | - JSON objects containing ``items``, ``memories``, ``results``, or ``data`` |
| #14 | - A running Hindsight API at ``/v1/default/banks/{bank}/memories/list`` |
| #15 | """ |
| #16 | |
| #17 | from __future__ import annotations |
| #18 | |
| #19 | import hashlib |
| #20 | import json |
| #21 | import urllib.parse |
| #22 | import urllib.request |
| #23 | from datetime import datetime, timezone |
| #24 | from pathlib import Path |
| #25 | from typing import Any, Dict, List, Optional |
| #26 | |
| #27 | from mnemosyne.core.importers.base import BaseImporter, ImporterResult |
| #28 | |
| #29 | |
| #30 | class HindsightImporter(BaseImporter): |
| #31 | """Import Hindsight memories into Mnemosyne episodic memory. |
| #32 | |
| #33 | Examples: |
| #34 | importer = HindsightImporter(file_path="hindsight-export.json") |
| #35 | result = importer.run(mnemosyne) |
| #36 | |
| #37 | importer = HindsightImporter(base_url="http://127.0.0.1:8888", bank="hermes") |
| #38 | result = importer.run(mnemosyne) |
| #39 | """ |
| #40 | |
| #41 | provider_name = "hindsight" |
| #42 | |
| #43 | def __init__(self, file_path: str = None, base_url: str = None, |
| #44 | bank: str = "hermes", page_size: int = 500, |
| #45 | max_items: int = None, namespace: str = None, |
| #46 | **kwargs): |
| #47 | super().__init__(**kwargs) |
| #48 | self.file_path = file_path |
| #49 | self.base_url = base_url.rstrip("/") if base_url else None |
| #50 | self.bank = bank |
| #51 | self.page_size = min(max(int(page_size), 1), 1000) |
| #52 | self.max_items = max_items |
| #53 | self.namespace = namespace or bank |
| #54 | |
| #55 | # ------------------------------------------------------------------ |
| #56 | # Extract |
| #57 | # ------------------------------------------------------------------ |
| #58 | |
| #59 | def extract(self) -> List[Dict]: |
| #60 | """Extract memories from a file or Hindsight HTTP API.""" |
| #61 | if self.file_path: |
| #62 | return self._extract_from_file(Path(self.file_path)) |
| #63 | if self.base_url: |
| #64 | return self._extract_from_api() |
| #65 | raise RuntimeError("Provide either file_path or base_url for Hindsight import") |
| #66 | |
| #67 | def _extract_from_file(self, path: Path) -> List[Dict]: |
| #68 | with path.open("r", encoding="utf-8") as f: |
| #69 | data = json.load(f) |
| #70 | return self._unwrap_items(data) |
| #71 | |
| #72 | def _extract_from_api(self) -> List[Dict]: |
| #73 | items: List[Dict] = [] |
| #74 | offset = 0 |
| #75 | while True: |
| #76 | query = urllib.parse.urlencode({"limit": self.page_size, "offset": offset}) |
| #77 | url = f"{self.base_url}/v1/default/banks/{self.bank}/memories/list?{query}" |
| #78 | with urllib.request.urlopen(url, timeout=60) as resp: |
| #79 | data = json.loads(resp.read().decode("utf-8")) |
| #80 | page = self._unwrap_items(data) |
| #81 | if not page: |
| #82 | break |
| #83 | items.extend(page) |
| #84 | offset += len(page) |
| #85 | if self.max_items and len(items) >= self.max_items: |
| #86 | return items[:self.max_items] |
| #87 | total = data.get("total") if isinstance(data, dict) else None |
| #88 | if total is not None and offset >= int(total): |
| #89 | break |
| #90 | return items |
| #91 | |
| #92 | @staticmethod |
| #93 | def _unwrap_items(data: Any) -> List[Dict]: |
| #94 | if isinstance(data, list): |
| #95 | return [x for x in data if isinstance(x, dict)] |
| #96 | if isinstance(data, dict): |
| #97 | for key in ("items", "memories", "results", "data"): |
| #98 | value = data.get(key) |
| #99 | if isinstance(value, list): |
| #100 | return [x for x in value if isinstance(x, dict)] |
| #101 | # Accept a single memory object as a one-item export. |
| #102 | if any(k in data for k in ("text", "content", "memory")): |
| #103 | return [data] |
| #104 | return [] |
| #105 | |
| #106 | # ------------------------------------------------------------------ |
| #107 | # Transform helpers |
| #108 | # ------------------------------------------------------------------ |
| #109 | |
| #110 | def transform(self, raw_data: List[Dict]) -> List[Dict]: |
| #111 | """Normalize Hindsight items into episodic import rows.""" |
| #112 | memories = [] |
| #113 | for item in raw_data: |
| #114 | content = self._content_for(item) |
| #115 | if not content: |
| #116 | continue |
| #117 | fact_type = item.get("fact_type") or item.get("type") or "memory" |
| #118 | timestamp = self._timestamp_for(item) |
| #119 | memories.append({ |
| #120 | "id": self._stable_id(item), |
| #121 | "content": content, |
| #122 | "source": f"hindsight:{fact_type}", |
| #123 | "timestamp": timestamp, |
| #124 | "session_id": self._session_id_for(item), |
| #125 | "importance": self._importance_for(item), |
| #126 | "metadata": self._metadata_for(item), |
| #127 | "valid_until": item.get("valid_until"), |
| #128 | "scope": "global", |
| #129 | "channel_id": "hindsight", |
| #130 | "author_id": None, |
| #131 | "author_type": None, |
| #132 | "veracity": "imported", |
| #133 | "created_at": timestamp, |
| #134 | }) |
| #135 | return memories |
| #136 | |
| #137 | @staticmethod |
| #138 | def _content_for(item: Dict) -> str: |
| #139 | content = item.get("text") or item.get("content") or item.get("memory") or "" |
| #140 | return str(content).strip() |
| #141 | |
| #142 | def _stable_id(self, item: Dict) -> str: |
| #143 | raw = item.get("id") or item.get("uuid") or self._content_for(item) or json.dumps(item, sort_keys=True, default=str) |
| #144 | digest = hashlib.sha256(f"{self.namespace}:{raw}".encode("utf-8")).hexdigest()[:24] |
| #145 | return f"hs_{digest}" |
| #146 | |
| #147 | @staticmethod |
| #148 | def _timestamp_for(item: Dict) -> str: |
| #149 | # mentioned_at is Hindsight's memory timestamp. date/occurred_* may be |
| #150 | # event dates and can be in the future, so only use them as fallbacks. |
| #151 | for key in ("mentioned_at", "timestamp", "created_at", "date", "occurred_start"): |
| #152 | value = item.get(key) |
| #153 | if value: |
| #154 | return str(value) |
| #155 | return datetime.now(timezone.utc).isoformat() |
| #156 | |
| #157 | def _session_id_for(self, item: Dict) -> str: |
| #158 | tags = item.get("tags") or [] |
| #159 | if isinstance(tags, list): |
| #160 | for tag in tags: |
| #161 | if isinstance(tag, str) and tag.startswith("session:"): |
| #162 | return tag.replace(":", "_", 1) |
| #163 | chunk_id = item.get("chunk_id") |
| #164 | if chunk_id: |
| #165 | digest = hashlib.sha256(str(chunk_id).encode("utf-8")).hexdigest()[:16] |
| #166 | return f"chunk_{digest}" |
| #167 | timestamp = self._timestamp_for(item)[:10] or "unknown-date" |
| #168 | return "hindsight_" + timestamp.replace("-", "") |
| #169 | |
| #170 | @staticmethod |
| #171 | def _importance_for(item: Dict) -> float: |
| #172 | explicit = item.get("importance") or item.get("score") |
| #173 | if explicit is not None: |
| #174 | try: |
| #175 | return max(0.0, min(float(explicit), 1.0)) |
| #176 | except (TypeError, ValueError): |
| #177 | pass |
| #178 | fact_type = item.get("fact_type") or item.get("type") |
| #179 | proof_count = item.get("proof_count") or 0 |
| #180 | try: |
| #181 | proof_bonus = min(float(proof_count), 5.0) * 0.03 |
| #182 | except (TypeError, ValueError): |
| #183 | proof_bonus = 0.0 |
| #184 | base = {"world": 0.75, "experience": 0.65, "observation": 0.55}.get(fact_type, 0.5) |
| #185 | return min(1.0, base + proof_bonus) |
| #186 | |
| #187 | def _metadata_for(self, item: Dict) -> Dict: |
| #188 | metadata = item.get("metadata") or {} |
| #189 | if isinstance(metadata, str): |
| #190 | try: |
| #191 | metadata = json.loads(metadata) |
| #192 | except (json.JSONDecodeError, TypeError): |
| #193 | metadata = {"raw_metadata": metadata} |
| #194 | elif not isinstance(metadata, dict): |
| #195 | metadata = {"raw_metadata": metadata} |
| #196 | |
| #197 | preserved = { |
| #198 | "migration_source": "hindsight", |
| #199 | "hindsight_bank": self.bank, |
| #200 | "hindsight_id": item.get("id"), |
| #201 | "hindsight_fact_type": item.get("fact_type") or item.get("type"), |
| #202 | "hindsight_context": item.get("context"), |
| #203 | "hindsight_date": item.get("date"), |
| #204 | "hindsight_mentioned_at": item.get("mentioned_at"), |
| #205 | "hindsight_occurred_start": item.get("occurred_start"), |
| #206 | "hindsight_occurred_end": item.get("occurred_end"), |
| #207 | "hindsight_entities": item.get("entities"), |
| #208 | "hindsight_chunk_id": item.get("chunk_id"), |
| #209 | "hindsight_proof_count": item.get("proof_count"), |
| #210 | "hindsight_tags": item.get("tags") or [], |
| #211 | "hindsight_consolidated_at": item.get("consolidated_at"), |
| #212 | "hindsight_consolidation_failed_at": item.get("consolidation_failed_at"), |
| #213 | } |
| #214 | return {**metadata, **preserved} |
| #215 | |
| #216 | # ------------------------------------------------------------------ |
| #217 | # Import |
| #218 | # ------------------------------------------------------------------ |
| #219 | |
| #220 | def run(self, mnemosyne, dry_run: bool = False, |
| #221 | session_id: str = None, channel_id: str = None) -> ImporterResult: |
| #222 | """Run import, preserving historical fields in episodic memory.""" |
| #223 | result = ImporterResult( |
| #224 | provider=self.provider_name, |
| #225 | started_at=datetime.now().isoformat(), |
| #226 | ) |
| #227 | try: |
| #228 | raw_data = self.extract() |
| #229 | result.total = len(raw_data) |
| #230 | if result.total == 0: |
| #231 | result.errors.append("No memories found to import from Hindsight") |
| #232 | result.finished_at = datetime.now().isoformat() |
| #233 | return result |
| #234 | if not self.validate(raw_data): |
| #235 | result.errors.append("Validation failed") |
| #236 | result.finished_at = datetime.now().isoformat() |
| #237 | return result |
| #238 | |
| #239 | memories = self.transform(raw_data) |
| #240 | result.skipped = result.total - len(memories) |
| #241 | if dry_run: |
| #242 | result.imported = len(memories) |
| #243 | result.finished_at = datetime.now().isoformat() |
| #244 | return result |
| #245 | |
| #246 | conn = mnemosyne.beam.conn |
| #247 | for mem in memories: |
| #248 | try: |
| #249 | if session_id: |
| #250 | mem["session_id"] = session_id |
| #251 | if channel_id: |
| #252 | mem["channel_id"] = channel_id |
| #253 | inserted = self._insert_episodic(conn, mem) |
| #254 | if inserted: |
| #255 | result.imported += 1 |
| #256 | result.memory_ids.append(mem["id"]) |
| #257 | else: |
| #258 | result.skipped += 1 |
| #259 | except Exception as e: |
| #260 | result.failed += 1 |
| #261 | result.errors.append( |
| #262 | f"Failed to import '{mem.get('content', '')[:80]}': {e}" |
| #263 | ) |
| #264 | conn.commit() |
| #265 | except Exception as e: |
| #266 | result.errors.append(f"Hindsight import failed: {e}") |
| #267 | result.finished_at = datetime.now().isoformat() |
| #268 | return result |
| #269 | |
| #270 | @staticmethod |
| #271 | def _insert_episodic(conn, mem: Dict) -> bool: |
| #272 | metadata_json = json.dumps(mem.get("metadata", {}), ensure_ascii=False, sort_keys=True, default=str) |
| #273 | cur = conn.execute(""" |
| #274 | INSERT OR IGNORE INTO episodic_memory |
| #275 | (id, content, source, timestamp, session_id, importance, metadata_json, |
| #276 | summary_of, veracity, created_at, degraded_at, valid_until, |
| #277 | channel_id, author_id, scope, superseded_by, author_type) |
| #278 | VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) |
| #279 | """, ( |
| #280 | mem["id"], |
| #281 | mem["content"], |
| #282 | mem.get("source", "hindsight:memory"), |
| #283 | mem.get("timestamp"), |
| #284 | mem.get("session_id", "hindsight"), |
| #285 | mem.get("importance", 0.5), |
| #286 | metadata_json, |
| #287 | "", |
| #288 | mem.get("veracity", "imported"), |
| #289 | mem.get("created_at") or mem.get("timestamp"), |
| #290 | None, |
| #291 | mem.get("valid_until"), |
| #292 | mem.get("channel_id"), |
| #293 | mem.get("author_id"), |
| #294 | mem.get("scope", "global"), |
| #295 | None, |
| #296 | mem.get("author_type"), |
| #297 | )) |
| #298 | return cur.rowcount > 0 |
| #299 | |
| #300 | |
| #301 | def import_from_hindsight(mnemosyne, file_path: str = None, base_url: str = None, |
| #302 | bank: str = "hermes", dry_run: bool = False, |
| #303 | session_id: str = None, channel_id: str = None, |
| #304 | max_items: int = None) -> ImporterResult: |
| #305 | """Convenience wrapper for importing Hindsight memories.""" |
| #306 | importer = HindsightImporter( |
| #307 | file_path=file_path, |
| #308 | base_url=base_url, |
| #309 | bank=bank, |
| #310 | max_items=max_items, |
| #311 | ) |
| #312 | return importer.run( |
| #313 | mnemosyne, |
| #314 | dry_run=dry_run, |
| #315 | session_id=session_id, |
| #316 | channel_id=channel_id, |
| #317 | ) |
| #318 |