my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	"""
#2	Mem0 memory provider importer.
#3
#4	Extracts all memories from a Mem0 instance (cloud or self-hosted)
#5	and imports them into Mnemosyne.
#6
#7	Extraction methods (tried in order):
#8	1. Mem0 Python SDK — `client.get_all()` with pagination
#9	2. Mem0 Platform API — `create_memory_export()` structured export
#10	3. Mem0 OSS REST API — `GET /memories` endpoint
#11
#12	Field mapping:
#13	memory → content
#14	user_id / agent_id → author_id
#15	metadata → metadata
#16	categories → metadata tags
#17	created_at → timestamp
#18	app_id → channel_id
#19	run_id → preserved in metadata
#20	"""
#21
#22	import json
#23	from datetime import datetime
#24	from typing import List, Dict, Optional, Any
#25
#26	from mnemosyne.core.importers.base import BaseImporter, ImporterResult
#27
#28
#29	class Mem0Importer(BaseImporter):
#30	"""Import memories from Mem0 into Mnemosyne.
#31
#32	Usage:
#33	importer = Mem0Importer(
#34	api_key="sk-xxx",
#35	user_id="alice", # optional: filter by user
#36	agent_id=None, # optional: filter by agent
#37	base_url=None, # for self-hosted Mem0 OSS
#38	page_size=200, # items per paginated request
#39	)
#40	result = importer.run(mnemosyne_instance)
#41	"""
#42
#43	provider_name = "mem0"
#44
#45	def __init__(self, api_key: str = None, user_id: str = None,
#46	agent_id: str = None, app_id: str = None,
#47	base_url: str = None, page_size: int = 200,
#48	**kwargs):
#49	super().__init__(**kwargs)
#50	self.api_key = api_key
#51	self.user_id = user_id
#52	self.agent_id = agent_id
#53	self.app_id = app_id
#54	self.base_url = base_url
#55	self.page_size = min(page_size, 200) # Mem0 max is 200
#56
#57	def extract(self) -> List[Dict]:
#58	"""Extract all memories from Mem0.
#59
#60	Tries SDK first, falls back to REST API.
#61	"""
#62	# Try Python SDK first
#63	try:
#64	return self._extract_via_sdk()
#65	except ImportError:
#66	pass
#67
#68	# Fall back to REST API
#69	try:
#70	return self._extract_via_rest()
#71	except Exception:
#72	pass
#73
#74	# Try structured export (Platform only)
#75	try:
#76	return self._extract_via_export()
#77	except Exception:
#78	pass
#79
#80	raise RuntimeError(
#81	"Could not extract memories from Mem0. "
#82	"Install the SDK: pip install mem0ai\n"
#83	"Or provide a base_url for self-hosted REST API."
#84	)
#85
#86	def _extract_via_sdk(self) -> List[Dict]:
#87	"""Extract using the Mem0 Python SDK."""
#88	from mem0 import MemoryClient
#89
#90	client = MemoryClient(
#91	api_key=self.api_key,
#92	host=self.base_url,
#93	)
#94
#95	filters = {}
#96	if self.user_id:
#97	filters["user_id"] = self.user_id
#98	if self.agent_id:
#99	filters["agent_id"] = self.agent_id
#100	if self.app_id:
#101	filters["app_id"] = self.app_id
#102
#103	# If no entity filter, try to get all by using wildcard user_id
#104	if not filters:
#105	filters["user_id"] = "*"
#106
#107	all_memories = []
#108	page = 1
#109	while True:
#110	resp = client.get_all(
#111	filters=filters,
#112	page=page,
#113	page_size=self.page_size,
#114	)
#115	results = resp.get("results", [])
#116	all_memories.extend(results)
#117
#118	if resp.get("next") is None or len(results) == 0:
#119	break
#120	page += 1
#121
#122	return all_memories
#123
#124	def _extract_via_rest(self) -> List[Dict]:
#125	"""Extract using Mem0 OSS REST API."""
#126	import urllib.request
#127	import urllib.parse
#128
#129	url = f"{self.base_url or 'http://localhost:8000'}/memories"
#130	params = {}
#131	if self.user_id:
#132	params["user_id"] = self.user_id
#133	if self.agent_id:
#134	params["agent_id"] = self.agent_id
#135
#136	if params:
#137	url += "?" + urllib.parse.urlencode(params)
#138
#139	req = urllib.request.Request(url)
#140	if self.api_key:
#141	req.add_header("Authorization", f"Bearer {self.api_key}")
#142
#143	with urllib.request.urlopen(req, timeout=30) as resp:
#144	data = json.loads(resp.read().decode())
#145	if isinstance(data, list):
#146	return data
#147	if isinstance(data, dict):
#148	return data.get("results", data.get("memories", data.get("data", [])))
#149	return []
#150
#151	def _extract_via_export(self) -> List[Dict]:
#152	"""Extract using Mem0 Platform structured export."""
#153	from mem0 import MemoryClient
#154
#155	client = MemoryClient(
#156	api_key=self.api_key,
#157	host=self.base_url,
#158	)
#159
#160	filters = {}
#161	if self.user_id:
#162	filters["user_id"] = self.user_id
#163	if self.agent_id:
#164	filters["agent_id"] = self.agent_id
#165
#166	# Create a generic schema to get all memory content
#167	# The export API requires a Pydantic schema — use a catch-all
#168	export_job = client.create_memory_export(
#169	filters=filters,
#170	export_instructions=(
#171	"Return ALL memories as a flat JSON array. "
#172	"Include every memory without filtering or summarizing. "
#173	"Format: [{'content': '...', 'user_id': '...', 'created_at': '...', 'metadata': {...}}, ...]"
#174	),
#175	)
#176
#177	export_id = export_job.get("id") or export_job.get("export_id")
#178	if not export_id:
#179	raise RuntimeError("Export job created but no export_id returned")
#180
#181	result = client.get_memory_export(memory_export_id=export_id)
#182	if isinstance(result, list):
#183	return result
#184	if isinstance(result, dict):
#185	return result.get("results", result.get("memories", result.get("data", [])))
#186	return []
#187
#188	def transform(self, raw_data: List[Dict]) -> List[Dict]:
#189	"""Transform Mem0 memories to Mnemosyne-compatible format."""
#190	memories = []
#191
#192	for item in raw_data:
#193	# Resolve content field — Mem0 uses 'memory', others use 'content'
#194	content = item.get("memory", item.get("content", ""))
#195	if not content:
#196	continue
#197
#198	# Author identity: prefer user_id, fall back to agent_id
#199	mem0_user = item.get("user_id")
#200	mem0_agent = item.get("agent_id")
#201	author_id = None
#202	author_type = "human"
#203	if mem0_user:
#204	author_id = f"mem0_user:{mem0_user}"
#205	author_type = "human"
#206	elif mem0_agent:
#207	author_id = f"mem0_agent:{mem0_agent}"
#208	author_type = "agent"
#209
#210	# Channel: use app_id as channel
#211	channel_id = item.get("app_id")
#212
#213	# Timestamp
#214	timestamp = item.get("created_at") or item.get("timestamp")
#215	if not timestamp:
#216	timestamp = datetime.now().isoformat()
#217
#218	# Build metadata preserving Mem0-specific fields
#219	metadata = item.get("metadata", {}) or {}
#220	if isinstance(metadata, str):
#221	try:
#222	metadata = json.loads(metadata)
#223	except (json.JSONDecodeError, TypeError):
#224	metadata = {"raw": str(metadata)}
#225
#226	# Preserve Mem0-native fields in metadata
#227	metadata["_mem0_id"] = item.get("id", "")
#228	metadata["_mem0_hash"] = item.get("hash", "")
#229	metadata["_mem0_run_id"] = item.get("run_id", "")
#230	if item.get("categories"):
#231	metadata["_mem0_categories"] = item.get("categories")
#232
#233	# Importance: infer from metadata or default
#234	importance = 0.5
#235	if "importance" in metadata:
#236	try:
#237	importance = float(metadata.pop("importance"))
#238	except (ValueError, TypeError):
#239	pass
#240
#241	# Updated timestamp
#242	updated_at = item.get("updated_at")
#243	if updated_at and updated_at != timestamp:
#244	metadata["_updated_at"] = updated_at
#245
#246	memories.append({
#247	"content": content,
#248	"source": "mem0_import",
#249	"importance": importance,
#250	"metadata": metadata,
#251	"valid_until": None,
#252	"scope": "session",
#253	# Custom fields for Mnemosyne identity layer
#254	"_author_id": author_id,
#255	"_author_type": author_type,
#256	"_channel_id": channel_id,
#257	"_timestamp": timestamp,
#258	})
#259
#260	return memories
#261
#262	def run(self, mnemosyne, dry_run: bool = False,
#263	session_id: str = None, channel_id: str = None) -> ImporterResult:
#264	"""Override run() to handle identity-aware import."""
#265	result = ImporterResult(
#266	provider=self.provider_name,
#267	started_at=datetime.now().isoformat(),
#268	)
#269
#270	try:
#271	raw_data = self.extract()
#272	result.total = len(raw_data)
#273
#274	if result.total == 0:
#275	result.errors.append("No memories found to import from Mem0")
#276	return result
#277
#278	if not self.validate(raw_data):
#279	result.errors.append("Validation failed")
#280	return result
#281
#282	memories = self.transform(raw_data)
#283
#284	if dry_run:
#285	result.imported = len(memories)
#286	return result
#287
#288	for mem_dict in memories:
#289	try:
#290	# Extract identity fields before passing to remember()
#291	author_id = mem_dict.pop("_author_id", None)
#292	author_type = mem_dict.pop("_author_type", None)
#293	chan = mem_dict.pop("_channel_id", None) or channel_id
#294	ts = mem_dict.pop("_timestamp", None)
#295
#296	# Build metadata with timestamp
#297	meta = mem_dict.get("metadata", {})
#298	if ts:
#299	meta["imported_at_original"] = ts
#300
#301	# Override session_id if specified
#302	if session_id:
#303	sid = session_id
#304	else:
#305	sid = None # Let Mnemosyne use its own session
#306
#307	mid = mnemosyne.remember(
#308	content=mem_dict["content"],
#309	source=mem_dict.get("source", self.provider_name),
#310	importance=mem_dict.get("importance", 0.5),
#311	metadata=meta,
#312	valid_until=mem_dict.get("valid_until"),
#313	scope=mem_dict.get("scope", "session"),
#314	)
#315
#316	# Store identity via triple or direct beam write
#317	# We use the Mnemosyne's beam directly for identity columns
#318	if author_id or author_type or chan:
#319	try:
#320	mnemosyne.beam.conn.execute("""
#321	UPDATE working_memory
#322	SET author_id = COALESCE(author_id, ?),
#323	author_type = COALESCE(author_type, ?),
#324	channel_id = COALESCE(channel_id, ?)
#325	WHERE id = ?
#326	""", (author_id, author_type, chan, mid))
#327	mnemosyne.beam.conn.commit()
#328	except Exception:
#329	pass # Identity update is best-effort
#330
#331	result.memory_ids.append(mid)
#332	result.imported += 1
#333
#334	except Exception as e:
#335	result.failed += 1
#336	result.errors.append(
#337	f"Failed to import '{mem_dict.get('content', '')[:80]}': {e}"
#338	)
#339
#340	except Exception as e:
#341	result.errors.append(f"Mem0 import failed: {e}")
#342
#343	result.finished_at = datetime.now().isoformat()
#344	return result
#345
#346
#347	# Convenience function
#348	def import_from_mem0(api_key: str, mnemosyne, user_id: str = None,
#349	agent_id: str = None, base_url: str = None,
#350	dry_run: bool = False, session_id: str = None,
#351	channel_id: str = None) -> ImporterResult:
#352	"""Import all memories from a Mem0 instance into Mnemosyne.
#353
#354	Args:
#355	api_key: Mem0 API key (or set MEM0_API_KEY env var).
#356	mnemosyne: Mnemosyne instance to import into.
#357	user_id: Optional Mem0 user_id to filter by.
#358	agent_id: Optional Mem0 agent_id to filter by.
#359	base_url: Optional base URL for self-hosted Mem0.
#360	dry_run: If True, validate but don't write.
#361	session_id: Override session for imported memories.
#362	channel_id: Channel for imported memories.
#363
#364	Returns:
#365	ImporterResult with counts and errors.
#366	"""
#367	importer = Mem0Importer(
#368	api_key=api_key,
#369	user_id=user_id,
#370	agent_id=agent_id,
#371	base_url=base_url,
#372	)
#373	return importer.run(
#374	mnemosyne,
#375	dry_run=dry_run,
#376	session_id=session_id,
#377	channel_id=channel_id,
#378	)
#379

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public