my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

my-project-public — gitlawb

#1	"""
#2	Base classes for memory provider importers.
#3
#4	Each importer implements three methods:
#5	- extract(): Pull all memories from the source provider
#6	- transform(): Convert provider-specific format to Mnemosyne-compatible dicts
#7	- validate(): Check extracted data before import
#8	"""
#9
#10	import json
#11	import hashlib
#12	from abc import ABC, abstractmethod
#13	from dataclasses import dataclass, field
#14	from datetime import datetime
#15	from typing import List, Dict, Optional, Any
#16
#17
#18	@dataclass
#19	class ImporterResult:
#20	"""Result of an import operation."""
#21	provider: str
#22	total: int = 0
#23	imported: int = 0
#24	skipped: int = 0
#25	failed: int = 0
#26	errors: List[str] = field(default_factory=list)
#27	memory_ids: List[str] = field(default_factory=list)
#28	started_at: str = ""
#29	finished_at: str = ""
#30
#31	def to_dict(self) -> Dict:
#32	return {
#33	"provider": self.provider,
#34	"total": self.total,
#35	"imported": self.imported,
#36	"skipped": self.skipped,
#37	"failed": self.failed,
#38	"errors": self.errors[:20], # cap at 20
#39	"memory_ids": self.memory_ids[:50], # cap at 50
#40	"started_at": self.started_at,
#41	"finished_at": self.finished_at,
#42	}
#43
#44	def to_json(self) -> str:
#45	return json.dumps(self.to_dict(), indent=2)
#46
#47
#48	class BaseImporter(ABC):
#49	"""Abstract base class for memory provider importers.
#50
#51	Subclass this for each provider. Implement extract(), transform(),
#52	and optionally validate().
#53
#54	Usage:
#55	importer = Mem0Importer(api_key="sk-xxx", user_id="alice")
#56	result = importer.run(mnemosyne_instance)
#57	"""
#58
#59	provider_name: str = "unknown"
#60
#61	def __init__(self, **kwargs):
#62	self.config = kwargs
#63
#64	def run(self, mnemosyne, dry_run: bool = False,
#65	session_id: str = None, channel_id: str = None) -> ImporterResult:
#66	"""Full import pipeline: extract → validate → transform → import.
#67
#68	Args:
#69	mnemosyne: A Mnemosyne instance to import into.
#70	dry_run: If True, validate and transform but don't write.
#71	session_id: Override session_id for imported memories.
#72	channel_id: Channel to assign imported memories to.
#73
#74	Returns:
#75	ImporterResult with counts and errors.
#76	"""
#77	result = ImporterResult(
#78	provider=self.provider_name,
#79	started_at=datetime.now().isoformat(),
#80	)
#81
#82	try:
#83	# Phase 1: Extract raw data from source
#84	raw_data = self.extract()
#85	result.total = len(raw_data)
#86
#87	if result.total == 0:
#88	result.errors.append("No memories found to import")
#89	return result
#90
#91	# Phase 2: Validate
#92	if not self.validate(raw_data):
#93	result.errors.append("Validation failed")
#94	return result
#95
#96	# Phase 3: Transform to Mnemosyne format
#97	memories = self.transform(raw_data)
#98
#99	if dry_run:
#100	result.imported = len(memories)
#101	result.skipped = 0
#102	result.failed = 0
#103	return result
#104
#105	# Phase 4: Import into Mnemosyne
#106	for mem_dict in memories:
#107	try:
#108	# Allow session/channel overrides from CLI
#109	if session_id:
#110	mem_dict["session_id"] = session_id
#111	if channel_id:
#112	mem_dict["channel_id"] = channel_id
#113
#114	mid = mnemosyne.remember(
#115	content=mem_dict["content"],
#116	source=mem_dict.get("source", self.provider_name),
#117	importance=mem_dict.get("importance", 0.5),
#118	metadata=mem_dict.get("metadata", {}),
#119	valid_until=mem_dict.get("valid_until"),
#120	scope=mem_dict.get("scope", "session"),
#121	)
#122	result.memory_ids.append(mid)
#123	result.imported += 1
#124	except Exception as e:
#125	result.failed += 1
#126	result.errors.append(
#127	f"Failed to import '{mem_dict.get('content', '')[:80]}': {e}"
#128	)
#129
#130	if result.skipped:
#131	result.skipped = result.total - result.imported - result.failed
#132
#133	except Exception as e:
#134	result.errors.append(f"Import failed: {e}")
#135
#136	result.finished_at = datetime.now().isoformat()
#137	return result
#138
#139	@abstractmethod
#140	def extract(self) -> List[Dict]:
#141	"""Extract all memories from the source provider.
#142
#143	Returns a list of provider-specific raw dicts.
#144	Each dict should contain at minimum: 'content' (str).
#145	Common fields: 'timestamp', 'metadata', 'user_id', 'tags'.
#146	"""
#147
#148	@abstractmethod
#149	def transform(self, raw_data: List[Dict]) -> List[Dict]:
#150	"""Transform provider-specific dicts to Mnemosyne-compatible dicts.
#151
#152	Each returned dict should have:
#153	content: str (required)
#154	source: str (default: provider_name)
#155	importance: float (default: 0.5)
#156	metadata: dict (default: {})
#157	valid_until: str \| None (default: None)
#158	scope: str (default: "session")
#159
#160	Return a list of dicts ready for mnemosyne.remember().
#161	"""
#162
#163	def validate(self, raw_data: List[Dict]) -> bool:
#164	"""Check that extracted data looks valid. Override for provider-specific checks."""
#165	if not raw_data:
#166	return False
#167	if not isinstance(raw_data, list):
#168	return False
#169	# At minimum, each item should be a dict with 'content'
#170	for item in raw_data:
#171	if not isinstance(item, dict):
#172	return False
#173	if "content" not in item and "memory" not in item and "text" not in item:
#174	# Allow common aliases — transform() handles normalization
#175	pass
#176	return True
#177
#178	@staticmethod
#179	def _content_hash(content: str) -> str:
#180	"""Generate a deterministic hash for deduplication."""
#181	return hashlib.sha256(content.encode()).hexdigest()[:16]
#182
#183
#184	def import_from_file(filepath: str, mnemosyne, dry_run: bool = False,
#185	session_id: str = None, channel_id: str = None) -> ImporterResult:
#186	"""Import memories from a JSON export file.
#187
#188	The file should contain an array of memory objects, each with at minimum a
#189	'content' key. Common formats from other providers are auto-detected.
#190
#191	Args:
#192	filepath: Path to JSON export file.
#193	mnemosyne: Mnemosyne instance to import into.
#194	dry_run: If True, validate but don't write.
#195	session_id: Override session for imported memories.
#196	channel_id: Channel to assign imported memories to.
#197
#198	Returns:
#199	ImporterResult
#200	"""
#201	from .base import BaseImporter, ImporterResult
#202
#203	class FileImporter(BaseImporter):
#204	provider_name = "file"
#205
#206	def __init__(self, filepath, **kwargs):
#207	super().__init__(**kwargs)
#208	self.filepath = filepath
#209
#210	def extract(self):
#211	with open(self.filepath) as f:
#212	data = json.load(f)
#213	# Handle single object vs array
#214	if isinstance(data, dict):
#215	data = [data]
#216	# Handle wrapped formats: {"results": [...], "memories": [...]}
#217	if isinstance(data, list) and len(data) == 1 and isinstance(data[0], dict):
#218	inner = data[0]
#219	if "results" in inner and isinstance(inner["results"], list):
#220	data = inner["results"]
#221	elif "memories" in inner and isinstance(inner["memories"], list):
#222	data = inner["memories"]
#223	elif "data" in inner and isinstance(inner["data"], list):
#224	data = inner["data"]
#225	return data
#226
#227	def transform(self, raw_data):
#228	memories = []
#229	for item in raw_data:
#230	content = item.get("content") or item.get("memory") or item.get("text", "")
#231	if not content:
#232	continue
#233	memories.append({
#234	"content": content,
#235	"source": item.get("source", "file_import"),
#236	"importance": float(item.get("importance", 0.5)),
#237	"metadata": item.get("metadata", {}),
#238	"valid_until": item.get("valid_until"),
#239	"scope": item.get("scope", "session"),
#240	})
#241	return memories
#242
#243	importer = FileImporter(filepath)
#244	return importer.run(mnemosyne, dry_run=dry_run,
#245	session_id=session_id, channel_id=channel_id)
#246

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public