repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | """ |
| #2 | Base classes for memory provider importers. |
| #3 | |
| #4 | Each importer implements three methods: |
| #5 | - extract(): Pull all memories from the source provider |
| #6 | - transform(): Convert provider-specific format to Mnemosyne-compatible dicts |
| #7 | - validate(): Check extracted data before import |
| #8 | """ |
| #9 | |
| #10 | import json |
| #11 | import hashlib |
| #12 | from abc import ABC, abstractmethod |
| #13 | from dataclasses import dataclass, field |
| #14 | from datetime import datetime |
| #15 | from typing import List, Dict, Optional, Any |
| #16 | |
| #17 | |
| #18 | @dataclass |
| #19 | class ImporterResult: |
| #20 | """Result of an import operation.""" |
| #21 | provider: str |
| #22 | total: int = 0 |
| #23 | imported: int = 0 |
| #24 | skipped: int = 0 |
| #25 | failed: int = 0 |
| #26 | errors: List[str] = field(default_factory=list) |
| #27 | memory_ids: List[str] = field(default_factory=list) |
| #28 | started_at: str = "" |
| #29 | finished_at: str = "" |
| #30 | |
| #31 | def to_dict(self) -> Dict: |
| #32 | return { |
| #33 | "provider": self.provider, |
| #34 | "total": self.total, |
| #35 | "imported": self.imported, |
| #36 | "skipped": self.skipped, |
| #37 | "failed": self.failed, |
| #38 | "errors": self.errors[:20], # cap at 20 |
| #39 | "memory_ids": self.memory_ids[:50], # cap at 50 |
| #40 | "started_at": self.started_at, |
| #41 | "finished_at": self.finished_at, |
| #42 | } |
| #43 | |
| #44 | def to_json(self) -> str: |
| #45 | return json.dumps(self.to_dict(), indent=2) |
| #46 | |
| #47 | |
| #48 | class BaseImporter(ABC): |
| #49 | """Abstract base class for memory provider importers. |
| #50 | |
| #51 | Subclass this for each provider. Implement extract(), transform(), |
| #52 | and optionally validate(). |
| #53 | |
| #54 | Usage: |
| #55 | importer = Mem0Importer(api_key="sk-xxx", user_id="alice") |
| #56 | result = importer.run(mnemosyne_instance) |
| #57 | """ |
| #58 | |
| #59 | provider_name: str = "unknown" |
| #60 | |
| #61 | def __init__(self, **kwargs): |
| #62 | self.config = kwargs |
| #63 | |
| #64 | def run(self, mnemosyne, dry_run: bool = False, |
| #65 | session_id: str = None, channel_id: str = None) -> ImporterResult: |
| #66 | """Full import pipeline: extract → validate → transform → import. |
| #67 | |
| #68 | Args: |
| #69 | mnemosyne: A Mnemosyne instance to import into. |
| #70 | dry_run: If True, validate and transform but don't write. |
| #71 | session_id: Override session_id for imported memories. |
| #72 | channel_id: Channel to assign imported memories to. |
| #73 | |
| #74 | Returns: |
| #75 | ImporterResult with counts and errors. |
| #76 | """ |
| #77 | result = ImporterResult( |
| #78 | provider=self.provider_name, |
| #79 | started_at=datetime.now().isoformat(), |
| #80 | ) |
| #81 | |
| #82 | try: |
| #83 | # Phase 1: Extract raw data from source |
| #84 | raw_data = self.extract() |
| #85 | result.total = len(raw_data) |
| #86 | |
| #87 | if result.total == 0: |
| #88 | result.errors.append("No memories found to import") |
| #89 | return result |
| #90 | |
| #91 | # Phase 2: Validate |
| #92 | if not self.validate(raw_data): |
| #93 | result.errors.append("Validation failed") |
| #94 | return result |
| #95 | |
| #96 | # Phase 3: Transform to Mnemosyne format |
| #97 | memories = self.transform(raw_data) |
| #98 | |
| #99 | if dry_run: |
| #100 | result.imported = len(memories) |
| #101 | result.skipped = 0 |
| #102 | result.failed = 0 |
| #103 | return result |
| #104 | |
| #105 | # Phase 4: Import into Mnemosyne |
| #106 | for mem_dict in memories: |
| #107 | try: |
| #108 | # Allow session/channel overrides from CLI |
| #109 | if session_id: |
| #110 | mem_dict["session_id"] = session_id |
| #111 | if channel_id: |
| #112 | mem_dict["channel_id"] = channel_id |
| #113 | |
| #114 | mid = mnemosyne.remember( |
| #115 | content=mem_dict["content"], |
| #116 | source=mem_dict.get("source", self.provider_name), |
| #117 | importance=mem_dict.get("importance", 0.5), |
| #118 | metadata=mem_dict.get("metadata", {}), |
| #119 | valid_until=mem_dict.get("valid_until"), |
| #120 | scope=mem_dict.get("scope", "session"), |
| #121 | ) |
| #122 | result.memory_ids.append(mid) |
| #123 | result.imported += 1 |
| #124 | except Exception as e: |
| #125 | result.failed += 1 |
| #126 | result.errors.append( |
| #127 | f"Failed to import '{mem_dict.get('content', '')[:80]}': {e}" |
| #128 | ) |
| #129 | |
| #130 | if result.skipped: |
| #131 | result.skipped = result.total - result.imported - result.failed |
| #132 | |
| #133 | except Exception as e: |
| #134 | result.errors.append(f"Import failed: {e}") |
| #135 | |
| #136 | result.finished_at = datetime.now().isoformat() |
| #137 | return result |
| #138 | |
| #139 | @abstractmethod |
| #140 | def extract(self) -> List[Dict]: |
| #141 | """Extract all memories from the source provider. |
| #142 | |
| #143 | Returns a list of provider-specific raw dicts. |
| #144 | Each dict should contain at minimum: 'content' (str). |
| #145 | Common fields: 'timestamp', 'metadata', 'user_id', 'tags'. |
| #146 | """ |
| #147 | |
| #148 | @abstractmethod |
| #149 | def transform(self, raw_data: List[Dict]) -> List[Dict]: |
| #150 | """Transform provider-specific dicts to Mnemosyne-compatible dicts. |
| #151 | |
| #152 | Each returned dict should have: |
| #153 | content: str (required) |
| #154 | source: str (default: provider_name) |
| #155 | importance: float (default: 0.5) |
| #156 | metadata: dict (default: {}) |
| #157 | valid_until: str | None (default: None) |
| #158 | scope: str (default: "session") |
| #159 | |
| #160 | Return a list of dicts ready for mnemosyne.remember(). |
| #161 | """ |
| #162 | |
| #163 | def validate(self, raw_data: List[Dict]) -> bool: |
| #164 | """Check that extracted data looks valid. Override for provider-specific checks.""" |
| #165 | if not raw_data: |
| #166 | return False |
| #167 | if not isinstance(raw_data, list): |
| #168 | return False |
| #169 | # At minimum, each item should be a dict with 'content' |
| #170 | for item in raw_data: |
| #171 | if not isinstance(item, dict): |
| #172 | return False |
| #173 | if "content" not in item and "memory" not in item and "text" not in item: |
| #174 | # Allow common aliases — transform() handles normalization |
| #175 | pass |
| #176 | return True |
| #177 | |
| #178 | @staticmethod |
| #179 | def _content_hash(content: str) -> str: |
| #180 | """Generate a deterministic hash for deduplication.""" |
| #181 | return hashlib.sha256(content.encode()).hexdigest()[:16] |
| #182 | |
| #183 | |
| #184 | def import_from_file(filepath: str, mnemosyne, dry_run: bool = False, |
| #185 | session_id: str = None, channel_id: str = None) -> ImporterResult: |
| #186 | """Import memories from a JSON export file. |
| #187 | |
| #188 | The file should contain an array of memory objects, each with at minimum a |
| #189 | 'content' key. Common formats from other providers are auto-detected. |
| #190 | |
| #191 | Args: |
| #192 | filepath: Path to JSON export file. |
| #193 | mnemosyne: Mnemosyne instance to import into. |
| #194 | dry_run: If True, validate but don't write. |
| #195 | session_id: Override session for imported memories. |
| #196 | channel_id: Channel to assign imported memories to. |
| #197 | |
| #198 | Returns: |
| #199 | ImporterResult |
| #200 | """ |
| #201 | from .base import BaseImporter, ImporterResult |
| #202 | |
| #203 | class FileImporter(BaseImporter): |
| #204 | provider_name = "file" |
| #205 | |
| #206 | def __init__(self, filepath, **kwargs): |
| #207 | super().__init__(**kwargs) |
| #208 | self.filepath = filepath |
| #209 | |
| #210 | def extract(self): |
| #211 | with open(self.filepath) as f: |
| #212 | data = json.load(f) |
| #213 | # Handle single object vs array |
| #214 | if isinstance(data, dict): |
| #215 | data = [data] |
| #216 | # Handle wrapped formats: {"results": [...], "memories": [...]} |
| #217 | if isinstance(data, list) and len(data) == 1 and isinstance(data[0], dict): |
| #218 | inner = data[0] |
| #219 | if "results" in inner and isinstance(inner["results"], list): |
| #220 | data = inner["results"] |
| #221 | elif "memories" in inner and isinstance(inner["memories"], list): |
| #222 | data = inner["memories"] |
| #223 | elif "data" in inner and isinstance(inner["data"], list): |
| #224 | data = inner["data"] |
| #225 | return data |
| #226 | |
| #227 | def transform(self, raw_data): |
| #228 | memories = [] |
| #229 | for item in raw_data: |
| #230 | content = item.get("content") or item.get("memory") or item.get("text", "") |
| #231 | if not content: |
| #232 | continue |
| #233 | memories.append({ |
| #234 | "content": content, |
| #235 | "source": item.get("source", "file_import"), |
| #236 | "importance": float(item.get("importance", 0.5)), |
| #237 | "metadata": item.get("metadata", {}), |
| #238 | "valid_until": item.get("valid_until"), |
| #239 | "scope": item.get("scope", "session"), |
| #240 | }) |
| #241 | return memories |
| #242 | |
| #243 | importer = FileImporter(filepath) |
| #244 | return importer.run(mnemosyne, dry_run=dry_run, |
| #245 | session_id=session_id, channel_id=channel_id) |
| #246 |