my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	"""
#2	Hindsight memory provider importer.
#3
#4	Hindsight is a Hermes memory backend that stores consolidated memories with
#5	historical timestamps and fact types. Unlike regular interactive writes, a
#6	migration must preserve that history, so this importer writes to Mnemosyne's
#7	``episodic_memory`` table directly instead of routing every item through
#8	``remember()`` (which would assign the current timestamp and working-memory
#9	session).
#10
#11	Supported inputs:
#12	- JSON export files containing a list of memories
#13	- JSON objects containing ``items``, ``memories``, ``results``, or ``data``
#14	- A running Hindsight API at ``/v1/default/banks/{bank}/memories/list``
#15	"""
#16
#17	from __future__ import annotations
#18
#19	import hashlib
#20	import json
#21	import urllib.parse
#22	import urllib.request
#23	from datetime import datetime, timezone
#24	from pathlib import Path
#25	from typing import Any, Dict, List, Optional
#26
#27	from mnemosyne.core.importers.base import BaseImporter, ImporterResult
#28
#29
#30	class HindsightImporter(BaseImporter):
#31	"""Import Hindsight memories into Mnemosyne episodic memory.
#32
#33	Examples:
#34	importer = HindsightImporter(file_path="hindsight-export.json")
#35	result = importer.run(mnemosyne)
#36
#37	importer = HindsightImporter(base_url="http://127.0.0.1:8888", bank="hermes")
#38	result = importer.run(mnemosyne)
#39	"""
#40
#41	provider_name = "hindsight"
#42
#43	def __init__(self, file_path: str = None, base_url: str = None,
#44	bank: str = "hermes", page_size: int = 500,
#45	max_items: int = None, namespace: str = None,
#46	**kwargs):
#47	super().__init__(**kwargs)
#48	self.file_path = file_path
#49	self.base_url = base_url.rstrip("/") if base_url else None
#50	self.bank = bank
#51	self.page_size = min(max(int(page_size), 1), 1000)
#52	self.max_items = max_items
#53	self.namespace = namespace or bank
#54
#55	# ------------------------------------------------------------------
#56	# Extract
#57	# ------------------------------------------------------------------
#58
#59	def extract(self) -> List[Dict]:
#60	"""Extract memories from a file or Hindsight HTTP API."""
#61	if self.file_path:
#62	return self._extract_from_file(Path(self.file_path))
#63	if self.base_url:
#64	return self._extract_from_api()
#65	raise RuntimeError("Provide either file_path or base_url for Hindsight import")
#66
#67	def _extract_from_file(self, path: Path) -> List[Dict]:
#68	with path.open("r", encoding="utf-8") as f:
#69	data = json.load(f)
#70	return self._unwrap_items(data)
#71
#72	def _extract_from_api(self) -> List[Dict]:
#73	items: List[Dict] = []
#74	offset = 0
#75	while True:
#76	query = urllib.parse.urlencode({"limit": self.page_size, "offset": offset})
#77	url = f"{self.base_url}/v1/default/banks/{self.bank}/memories/list?{query}"
#78	with urllib.request.urlopen(url, timeout=60) as resp:
#79	data = json.loads(resp.read().decode("utf-8"))
#80	page = self._unwrap_items(data)
#81	if not page:
#82	break
#83	items.extend(page)
#84	offset += len(page)
#85	if self.max_items and len(items) >= self.max_items:
#86	return items[:self.max_items]
#87	total = data.get("total") if isinstance(data, dict) else None
#88	if total is not None and offset >= int(total):
#89	break
#90	return items
#91
#92	@staticmethod
#93	def _unwrap_items(data: Any) -> List[Dict]:
#94	if isinstance(data, list):
#95	return [x for x in data if isinstance(x, dict)]
#96	if isinstance(data, dict):
#97	for key in ("items", "memories", "results", "data"):
#98	value = data.get(key)
#99	if isinstance(value, list):
#100	return [x for x in value if isinstance(x, dict)]
#101	# Accept a single memory object as a one-item export.
#102	if any(k in data for k in ("text", "content", "memory")):
#103	return [data]
#104	return []
#105
#106	# ------------------------------------------------------------------
#107	# Transform helpers
#108	# ------------------------------------------------------------------
#109
#110	def transform(self, raw_data: List[Dict]) -> List[Dict]:
#111	"""Normalize Hindsight items into episodic import rows."""
#112	memories = []
#113	for item in raw_data:
#114	content = self._content_for(item)
#115	if not content:
#116	continue
#117	fact_type = item.get("fact_type") or item.get("type") or "memory"
#118	timestamp = self._timestamp_for(item)
#119	memories.append({
#120	"id": self._stable_id(item),
#121	"content": content,
#122	"source": f"hindsight:{fact_type}",
#123	"timestamp": timestamp,
#124	"session_id": self._session_id_for(item),
#125	"importance": self._importance_for(item),
#126	"metadata": self._metadata_for(item),
#127	"valid_until": item.get("valid_until"),
#128	"scope": "global",
#129	"channel_id": "hindsight",
#130	"author_id": None,
#131	"author_type": None,
#132	"veracity": "imported",
#133	"created_at": timestamp,
#134	})
#135	return memories
#136
#137	@staticmethod
#138	def _content_for(item: Dict) -> str:
#139	content = item.get("text") or item.get("content") or item.get("memory") or ""
#140	return str(content).strip()
#141
#142	def _stable_id(self, item: Dict) -> str:
#143	raw = item.get("id") or item.get("uuid") or self._content_for(item) or json.dumps(item, sort_keys=True, default=str)
#144	digest = hashlib.sha256(f"{self.namespace}:{raw}".encode("utf-8")).hexdigest()[:24]
#145	return f"hs_{digest}"
#146
#147	@staticmethod
#148	def _timestamp_for(item: Dict) -> str:
#149	# mentioned_at is Hindsight's memory timestamp. date/occurred_* may be
#150	# event dates and can be in the future, so only use them as fallbacks.
#151	for key in ("mentioned_at", "timestamp", "created_at", "date", "occurred_start"):
#152	value = item.get(key)
#153	if value:
#154	return str(value)
#155	return datetime.now(timezone.utc).isoformat()
#156
#157	def _session_id_for(self, item: Dict) -> str:
#158	tags = item.get("tags") or []
#159	if isinstance(tags, list):
#160	for tag in tags:
#161	if isinstance(tag, str) and tag.startswith("session:"):
#162	return tag.replace(":", "_", 1)
#163	chunk_id = item.get("chunk_id")
#164	if chunk_id:
#165	digest = hashlib.sha256(str(chunk_id).encode("utf-8")).hexdigest()[:16]
#166	return f"chunk_{digest}"
#167	timestamp = self._timestamp_for(item)[:10] or "unknown-date"
#168	return "hindsight_" + timestamp.replace("-", "")
#169
#170	@staticmethod
#171	def _importance_for(item: Dict) -> float:
#172	explicit = item.get("importance") or item.get("score")
#173	if explicit is not None:
#174	try:
#175	return max(0.0, min(float(explicit), 1.0))
#176	except (TypeError, ValueError):
#177	pass
#178	fact_type = item.get("fact_type") or item.get("type")
#179	proof_count = item.get("proof_count") or 0
#180	try:
#181	proof_bonus = min(float(proof_count), 5.0) * 0.03
#182	except (TypeError, ValueError):
#183	proof_bonus = 0.0
#184	base = {"world": 0.75, "experience": 0.65, "observation": 0.55}.get(fact_type, 0.5)
#185	return min(1.0, base + proof_bonus)
#186
#187	def _metadata_for(self, item: Dict) -> Dict:
#188	metadata = item.get("metadata") or {}
#189	if isinstance(metadata, str):
#190	try:
#191	metadata = json.loads(metadata)
#192	except (json.JSONDecodeError, TypeError):
#193	metadata = {"raw_metadata": metadata}
#194	elif not isinstance(metadata, dict):
#195	metadata = {"raw_metadata": metadata}
#196
#197	preserved = {
#198	"migration_source": "hindsight",
#199	"hindsight_bank": self.bank,
#200	"hindsight_id": item.get("id"),
#201	"hindsight_fact_type": item.get("fact_type") or item.get("type"),
#202	"hindsight_context": item.get("context"),
#203	"hindsight_date": item.get("date"),
#204	"hindsight_mentioned_at": item.get("mentioned_at"),
#205	"hindsight_occurred_start": item.get("occurred_start"),
#206	"hindsight_occurred_end": item.get("occurred_end"),
#207	"hindsight_entities": item.get("entities"),
#208	"hindsight_chunk_id": item.get("chunk_id"),
#209	"hindsight_proof_count": item.get("proof_count"),
#210	"hindsight_tags": item.get("tags") or [],
#211	"hindsight_consolidated_at": item.get("consolidated_at"),
#212	"hindsight_consolidation_failed_at": item.get("consolidation_failed_at"),
#213	}
#214	return {metadata, preserved}
#215
#216	# ------------------------------------------------------------------
#217	# Import
#218	# ------------------------------------------------------------------
#219
#220	def run(self, mnemosyne, dry_run: bool = False,
#221	session_id: str = None, channel_id: str = None) -> ImporterResult:
#222	"""Run import, preserving historical fields in episodic memory."""
#223	result = ImporterResult(
#224	provider=self.provider_name,
#225	started_at=datetime.now().isoformat(),
#226	)
#227	try:
#228	raw_data = self.extract()
#229	result.total = len(raw_data)
#230	if result.total == 0:
#231	result.errors.append("No memories found to import from Hindsight")
#232	result.finished_at = datetime.now().isoformat()
#233	return result
#234	if not self.validate(raw_data):
#235	result.errors.append("Validation failed")
#236	result.finished_at = datetime.now().isoformat()
#237	return result
#238
#239	memories = self.transform(raw_data)
#240	result.skipped = result.total - len(memories)
#241	if dry_run:
#242	result.imported = len(memories)
#243	result.finished_at = datetime.now().isoformat()
#244	return result
#245
#246	conn = mnemosyne.beam.conn
#247	for mem in memories:
#248	try:
#249	if session_id:
#250	mem["session_id"] = session_id
#251	if channel_id:
#252	mem["channel_id"] = channel_id
#253	inserted = self._insert_episodic(conn, mem)
#254	if inserted:
#255	result.imported += 1
#256	result.memory_ids.append(mem["id"])
#257	else:
#258	result.skipped += 1
#259	except Exception as e:
#260	result.failed += 1
#261	result.errors.append(
#262	f"Failed to import '{mem.get('content', '')[:80]}': {e}"
#263	)
#264	conn.commit()
#265	except Exception as e:
#266	result.errors.append(f"Hindsight import failed: {e}")
#267	result.finished_at = datetime.now().isoformat()
#268	return result
#269
#270	@staticmethod
#271	def _insert_episodic(conn, mem: Dict) -> bool:
#272	metadata_json = json.dumps(mem.get("metadata", {}), ensure_ascii=False, sort_keys=True, default=str)
#273	cur = conn.execute("""
#274	INSERT OR IGNORE INTO episodic_memory
#275	(id, content, source, timestamp, session_id, importance, metadata_json,
#276	summary_of, veracity, created_at, degraded_at, valid_until,
#277	channel_id, author_id, scope, superseded_by, author_type)
#278	VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
#279	""", (
#280	mem["id"],
#281	mem["content"],
#282	mem.get("source", "hindsight:memory"),
#283	mem.get("timestamp"),
#284	mem.get("session_id", "hindsight"),
#285	mem.get("importance", 0.5),
#286	metadata_json,
#287	"",
#288	mem.get("veracity", "imported"),
#289	mem.get("created_at") or mem.get("timestamp"),
#290	None,
#291	mem.get("valid_until"),
#292	mem.get("channel_id"),
#293	mem.get("author_id"),
#294	mem.get("scope", "global"),
#295	None,
#296	mem.get("author_type"),
#297	))
#298	return cur.rowcount > 0
#299
#300
#301	def import_from_hindsight(mnemosyne, file_path: str = None, base_url: str = None,
#302	bank: str = "hermes", dry_run: bool = False,
#303	session_id: str = None, channel_id: str = None,
#304	max_items: int = None) -> ImporterResult:
#305	"""Convenience wrapper for importing Hindsight memories."""
#306	importer = HindsightImporter(
#307	file_path=file_path,
#308	base_url=base_url,
#309	bank=bank,
#310	max_items=max_items,
#311	)
#312	return importer.run(
#313	mnemosyne,
#314	dry_run=dry_run,
#315	session_id=session_id,
#316	channel_id=channel_id,
#317	)
#318

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public