my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

my-project-public — gitlawb

#1	import json
#2	import os
#3	import threading
#4	import time
#5	from concurrent.futures import ThreadPoolExecutor
#6
#7	from dotenv import load_dotenv
#8	from tqdm import tqdm
#9
#10	from mem0 import MemoryClient
#11
#12	load_dotenv()
#13
#14
#15	# Update custom instructions
#16	custom_instructions = """
#17	Generate personal memories that follow these guidelines:
#18
#19	1. Each memory should be self-contained with complete context, including:
#20	- The person's name, do not use "user" while creating memories
#21	- Personal details (career aspirations, hobbies, life circumstances)
#22	- Emotional states and reactions
#23	- Ongoing journeys or future plans
#24	- Specific dates when events occurred
#25
#26	2. Include meaningful personal narratives focusing on:
#27	- Identity and self-acceptance journeys
#28	- Family planning and parenting
#29	- Creative outlets and hobbies
#30	- Mental health and self-care activities
#31	- Career aspirations and education goals
#32	- Important life events and milestones
#33
#34	3. Make each memory rich with specific details rather than general statements
#35	- Include timeframes (exact dates when possible)
#36	- Name specific activities (e.g., "charity race for mental health" rather than just "exercise")
#37	- Include emotional context and personal growth elements
#38
#39	4. Extract memories only from user messages, not incorporating assistant responses
#40
#41	5. Format each memory as a paragraph with a clear narrative structure that captures the person's experience, challenges, and aspirations
#42	"""
#43
#44
#45	class MemoryADD:
#46	def __init__(self, data_path=None, batch_size=2, is_graph=False):
#47	self.mem0_client = MemoryClient(
#48	api_key=os.getenv("MEM0_API_KEY"),
#49	org_id=os.getenv("MEM0_ORGANIZATION_ID"),
#50	project_id=os.getenv("MEM0_PROJECT_ID"),
#51	)
#52
#53	self.mem0_client.update_project(custom_instructions=custom_instructions)
#54	self.batch_size = batch_size
#55	self.data_path = data_path
#56	self.data = None
#57	self.is_graph = is_graph
#58	if data_path:
#59	self.load_data()
#60
#61	def load_data(self):
#62	with open(self.data_path, "r") as f:
#63	self.data = json.load(f)
#64	return self.data
#65
#66	def add_memory(self, user_id, message, metadata, retries=3):
#67	for attempt in range(retries):
#68	try:
#69	_ = self.mem0_client.add(
#70	message, user_id=user_id, version="v2", metadata=metadata, enable_graph=self.is_graph
#71	)
#72	return
#73	except Exception as e:
#74	if attempt < retries - 1:
#75	time.sleep(1) # Wait before retrying
#76	continue
#77	else:
#78	raise e
#79
#80	def add_memories_for_speaker(self, speaker, messages, timestamp, desc):
#81	for i in tqdm(range(0, len(messages), self.batch_size), desc=desc):
#82	batch_messages = messages[i : i + self.batch_size]
#83	self.add_memory(speaker, batch_messages, metadata={"timestamp": timestamp})
#84
#85	def process_conversation(self, item, idx):
#86	conversation = item["conversation"]
#87	speaker_a = conversation["speaker_a"]
#88	speaker_b = conversation["speaker_b"]
#89
#90	speaker_a_user_id = f"{speaker_a}_{idx}"
#91	speaker_b_user_id = f"{speaker_b}_{idx}"
#92
#93	# delete all memories for the two users
#94	self.mem0_client.delete_all(user_id=speaker_a_user_id)
#95	self.mem0_client.delete_all(user_id=speaker_b_user_id)
#96
#97	for key in conversation.keys():
#98	if key in ["speaker_a", "speaker_b"] or "date" in key or "timestamp" in key:
#99	continue
#100
#101	date_time_key = key + "_date_time"
#102	timestamp = conversation[date_time_key]
#103	chats = conversation[key]
#104
#105	messages = []
#106	messages_reverse = []
#107	for chat in chats:
#108	if chat["speaker"] == speaker_a:
#109	messages.append({"role": "user", "content": f"{speaker_a}: {chat['text']}"})
#110	messages_reverse.append({"role": "assistant", "content": f"{speaker_a}: {chat['text']}"})
#111	elif chat["speaker"] == speaker_b:
#112	messages.append({"role": "assistant", "content": f"{speaker_b}: {chat['text']}"})
#113	messages_reverse.append({"role": "user", "content": f"{speaker_b}: {chat['text']}"})
#114	else:
#115	raise ValueError(f"Unknown speaker: {chat['speaker']}")
#116
#117	# add memories for the two users on different threads
#118	thread_a = threading.Thread(
#119	target=self.add_memories_for_speaker,
#120	args=(speaker_a_user_id, messages, timestamp, "Adding Memories for Speaker A"),
#121	)
#122	thread_b = threading.Thread(
#123	target=self.add_memories_for_speaker,
#124	args=(speaker_b_user_id, messages_reverse, timestamp, "Adding Memories for Speaker B"),
#125	)
#126
#127	thread_a.start()
#128	thread_b.start()
#129	thread_a.join()
#130	thread_b.join()
#131
#132	print("Messages added successfully")
#133
#134	def process_all_conversations(self, max_workers=10):
#135	if not self.data:
#136	raise ValueError("No data loaded. Please set data_path and call load_data() first.")
#137	with ThreadPoolExecutor(max_workers=max_workers) as executor:
#138	futures = [executor.submit(self.process_conversation, item, idx) for idx, item in enumerate(self.data)]
#139
#140	for future in futures:
#141	future.result()
#142

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public