my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	"""
#2	Honcho memory provider importer.
#3
#4	Honcho (by Plastic Labs) uses an entity-centric model:
#5	Workspaces → Peers → Sessions → Messages.
#6
#7	Extraction requires:
#8	1. List peers via SDK
#9	2. For each peer: list sessions → get context() with summaries
#10	3. For each session: list messages
#11
#12	Honcho uses PostgreSQL with pgvector. No bulk export.
#13	"""
#14
#15	import json
#16	from datetime import datetime
#17	from typing import List, Dict, Optional, Any
#18
#19	from mnemosyne.core.importers.base import BaseImporter, ImporterResult
#20
#21
#22	class HonchoImporter(BaseImporter):
#23	"""Import memories from Honcho into Mnemosyne.
#24
#25	Usage:
#26	importer = HonchoImporter(
#27	api_key="sk-xxx", # Honcho API key
#28	workspace_id="my-app", # required
#29	max_peers=50, # limit peers to extract
#30	)
#31	result = importer.run(mnemosyne_instance)
#32	"""
#33
#34	provider_name = "honcho"
#35
#36	def __init__(self, api_key: str = None, workspace_id: str = None,
#37	max_peers: int = None, **kwargs):
#38	super().__init__(**kwargs)
#39	self.api_key = api_key
#40	self.workspace_id = workspace_id or "default"
#41	self.max_peers = max_peers
#42
#43	def extract(self) -> List[Dict]:
#44	"""Extract memories from Honcho."""
#45	try:
#46	return self._extract_via_sdk()
#47	except ImportError:
#48	pass
#49	try:
#50	return self._extract_via_rest()
#51	except Exception:
#52	pass
#53	raise RuntimeError(
#54	"Could not extract from Honcho. Install: pip install honcho-ai"
#55	)
#56
#57	def _extract_via_sdk(self) -> List[Dict]:
#58	"""Extract using Honcho Python SDK."""
#59	from honcho import Honcho
#60
#61	honcho = Honcho(workspace_id=self.workspace_id)
#62	if self.api_key:
#63	honcho.api_key = self.api_key
#64
#65	all_items = []
#66
#67	# Step 1: List peers
#68	try:
#69	peers_resp = honcho.list_peers()
#70	peers = peers_resp.get("peers", peers_resp.get("items", []))
#71	except Exception:
#72	peers = []
#73
#74	peer_count = 0
#75	for peer in peers:
#76	if self.max_peers and peer_count >= self.max_peers:
#77	break
#78
#79	peer_id = peer.get("peer_id", peer.get("id", ""))
#80	peer_name = peer.get("name", peer_id)
#81
#82	# Step 2: List sessions for this peer
#83	try:
#84	sessions_resp = honcho.list_sessions(peer_id=peer_id)
#85	sessions = sessions_resp.get("sessions", sessions_resp.get("items", []))
#86	except Exception:
#87	sessions = []
#88
#89	for session in sessions:
#90	sid = session.get("session_id", session.get("id", ""))
#91	if not sid:
#92	continue
#93
#94	# Step 3: Get context (includes summary)
#95	try:
#96	ctx = honcho.session(sid).context(summary=True)
#97	if isinstance(ctx, dict):
#98	summary = ctx.get("summary", ctx.get("context", ""))
#99	if summary:
#100	all_items.append({
#101	"content": str(summary),
#102	"source": "honcho_summary",
#103	"peer_id": peer_id,
#104	"peer_name": peer_name,
#105	"session_id": sid,
#106	"role": "system",
#107	})
#108	except Exception:
#109	pass
#110
#111	# Step 4: Get messages
#112	try:
#113	messages_resp = honcho.session(sid).list_messages()
#114	messages = messages_resp.get("messages", messages_resp.get("items", []))
#115	for msg in messages:
#116	content = msg.get("content", msg.get("text", ""))
#117	if not content:
#118	continue
#119	all_items.append({
#120	"content": content,
#121	"source": "honcho_message",
#122	"peer_id": msg.get("peer_id", peer_id),
#123	"peer_name": peer_name,
#124	"session_id": sid,
#125	"role": "user",
#126	"timestamp": msg.get("created_at"),
#127	"metadata": msg.get("metadata", {}),
#128	})
#129	except Exception:
#130	pass
#131
#132	peer_count += 1
#133
#134	return all_items
#135
#136	def _extract_via_rest(self) -> List[Dict]:
#137	"""Extract using Honcho REST API."""
#138	import urllib.request
#139
#140	# Honcho is primarily SDK-based; REST extraction is similar
#141	# but depends on server configuration
#142	base = "http://localhost:8000"
#143	all_items = []
#144
#145	# Try listing peers
#146	req = urllib.request.Request(f"{base}/peers")
#147	try:
#148	with urllib.request.urlopen(req, timeout=10) as resp:
#149	peers = json.loads(resp.read().decode())
#150	except Exception:
#151	peers = []
#152
#153	for peer in peers:
#154	peer_id = peer.get("peer_id", "")
#155	# Try to get sessions
#156	req = urllib.request.Request(f"{base}/peers/{peer_id}/sessions")
#157	try:
#158	with urllib.request.urlopen(req, timeout=10) as resp:
#159	sessions = json.loads(resp.read().decode())
#160	except Exception:
#161	sessions = []
#162
#163	for session in sessions:
#164	sid = session.get("session_id", "")
#165	req = urllib.request.Request(f"{base}/sessions/{sid}/messages")
#166	try:
#167	with urllib.request.urlopen(req, timeout=10) as resp:
#168	messages = json.loads(resp.read().decode())
#169	for msg in messages:
#170	content = msg.get("content", "")
#171	if content:
#172	all_items.append({
#173	"content": content,
#174	"source": "honcho_message",
#175	"peer_id": peer_id,
#176	"session_id": sid,
#177	"role": "user",
#178	"timestamp": msg.get("created_at"),
#179	})
#180	except Exception:
#181	pass
#182
#183	return all_items
#184
#185	def transform(self, raw_data: List[Dict]) -> List[Dict]:
#186	"""Transform Honcho data to Mnemosyne format."""
#187	memories = []
#188	for item in raw_data:
#189	content = item.get("content", "")
#190	if not content:
#191	continue
#192
#193	peer_id = item.get("peer_id", "")
#194	peer_name = item.get("peer_name", peer_id)
#195	source = item.get("source", "honcho_import")
#196
#197	# Importance heuristic
#198	if source == "honcho_summary":
#199	importance = 0.7
#200	else:
#201	importance = 0.4
#202
#203	meta = item.get("metadata", {}) or {}
#204	meta["_honcho_session_id"] = item.get("session_id", "")
#205	meta["_honcho_peer_name"] = peer_name
#206
#207	ts = item.get("timestamp")
#208	if ts:
#209	meta["_timestamp"] = ts
#210
#211	memories.append({
#212	"content": content,
#213	"source": source,
#214	"importance": importance,
#215	"metadata": meta,
#216	"valid_until": None,
#217	"scope": "session",
#218	"_author_id": f"honcho_peer:{peer_id}" if peer_id else None,
#219	"_author_type": "human",
#220	"_channel_id": self.workspace_id,
#221	"_timestamp": ts,
#222	})
#223
#224	return memories
#225
#226	def run(self, mnemosyne, dry_run=False, session_id=None, channel_id=None):
#227	"""Override run to handle identity-aware import."""
#228	result = ImporterResult(provider=self.provider_name,
#229	started_at=datetime.now().isoformat())
#230	try:
#231	raw_data = self.extract()
#232	result.total = len(raw_data)
#233	if result.total == 0:
#234	result.errors.append("No memories found in Honcho")
#235	return result
#236	if not self.validate(raw_data):
#237	result.errors.append("Validation failed")
#238	return result
#239
#240	memories = self.transform(raw_data)
#241	if dry_run:
#242	result.imported = len(memories)
#243	return result
#244
#245	for mem_dict in memories:
#246	try:
#247	author_id = mem_dict.pop("_author_id", None)
#248	author_type = mem_dict.pop("_author_type", None)
#249	chan = mem_dict.pop("_channel_id", None) or channel_id
#250	ts = mem_dict.pop("_timestamp", None)
#251	meta = mem_dict.get("metadata", {})
#252	if ts:
#253	meta["imported_at_original"] = ts
#254
#255	mid = mnemosyne.remember(
#256	content=mem_dict["content"],
#257	source=mem_dict.get("source", self.provider_name),
#258	importance=mem_dict.get("importance", 0.5),
#259	metadata=meta,
#260	valid_until=mem_dict.get("valid_until"),
#261	scope=mem_dict.get("scope", "session"),
#262	)
#263	if author_id or author_type or chan:
#264	try:
#265	mnemosyne.beam.conn.execute("""
#266	UPDATE working_memory
#267	SET author_id = COALESCE(author_id, ?),
#268	author_type = COALESCE(author_type, ?),
#269	channel_id = COALESCE(channel_id, ?)
#270	WHERE id = ?
#271	""", (author_id, author_type, chan, mid))
#272	mnemosyne.beam.conn.commit()
#273	except Exception:
#274	pass
#275	result.memory_ids.append(mid)
#276	result.imported += 1
#277	except Exception as e:
#278	result.failed += 1
#279	result.errors.append(f"Failed: {str(e)[:100]}")
#280	except Exception as e:
#281	result.errors.append(f"Honcho import failed: {e}")
#282	result.finished_at = datetime.now().isoformat()
#283	return result
#284

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public