my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	"""
#2	Personal Voice Assistant with Memory (Whisper + CrewAI + Mem0 + ElevenLabs)
#3	This script creates a personalized AI assistant that can:
#4	- Understand voice commands using Whisper (OpenAI STT)
#5	- Respond intelligently using CrewAI Agent and LLMs
#6	- Remember user preferences and facts using Mem0 memory
#7	- Speak responses back using ElevenLabs text-to-speech
#8	Initial user memory is bootstrapped from predefined preferences, and the assistant can remember new context dynamically over time.
#9
#10	To run this file, you need to set the following environment variables:
#11
#12	export OPENAI_API_KEY="your_openai_api_key"
#13	export MEM0_API_KEY="your_mem0_api_key"
#14	export ELEVENLABS_API_KEY="your_elevenlabs_api_key"
#15
#16	You must also have:
#17	- A working microphone setup (pyaudio)
#18	- A valid ElevenLabs voice ID
#19	- Python packages: openai, elevenlabs, crewai, mem0ai, pyaudio
#20	"""
#21
#22	import tempfile
#23	import wave
#24
#25	import pyaudio
#26	from crewai import Agent, Crew, Process, Task
#27	from elevenlabs import play
#28	from elevenlabs.client import ElevenLabs
#29	from openai import OpenAI
#30
#31	from mem0 import MemoryClient
#32
#33	# ------------------ SETUP ------------------
#34	USER_ID = "Alex"
#35	openai_client = OpenAI()
#36	tts_client = ElevenLabs()
#37	memory_client = MemoryClient()
#38
#39
#40	# Function to store user preferences in memory
#41	def store_user_preferences(user_id: str, conversation: list):
#42	"""Store user preferences from conversation history"""
#43	memory_client.add(conversation, user_id=user_id)
#44
#45
#46	# Initialize memory with some basic preferences
#47	def initialize_memory():
#48	# Example conversation storage with voice assistant relevant preferences
#49	messages = [
#50	{
#51	"role": "user",
#52	"content": "Hi, my name is Alex Thompson. I'm 32 years old and work as a software engineer at TechCorp.",
#53	},
#54	{
#55	"role": "assistant",
#56	"content": "Hello Alex Thompson! Nice to meet you. I've noted that you're 32 and work as a software engineer at TechCorp. How can I help you today?",
#57	},
#58	{
#59	"role": "user",
#60	"content": "I prefer brief and concise responses without unnecessary explanations. I get frustrated when assistants are too wordy or repeat information I already know.",
#61	},
#62	{
#63	"role": "assistant",
#64	"content": "Got it. I'll keep my responses short, direct, and without redundancy.",
#65	},
#66	{
#67	"role": "user",
#68	"content": "I like to listen to jazz music when I'm working, especially artists like Miles Davis and John Coltrane. I find it helps me focus and be more productive.",
#69	},
#70	{
#71	"role": "assistant",
#72	"content": "I'll remember your preference for jazz while working, particularly Miles Davis and John Coltrane. It's great for focus.",
#73	},
#74	{
#75	"role": "user",
#76	"content": "I usually wake up at 7 AM and prefer reminders for meetings 30 minutes in advance. My most productive hours are between 9 AM and noon, so I try to schedule important tasks during that time.",
#77	},
#78	{
#79	"role": "assistant",
#80	"content": "Noted. You wake up at 7 AM, need meeting reminders 30 minutes ahead, and are most productive between 9 AM and noon for important tasks.",
#81	},
#82	{
#83	"role": "user",
#84	"content": "My favorite color is navy blue, and I prefer dark mode in all my apps. I'm allergic to peanuts, so please remind me to check ingredients when I ask about recipes or restaurants.",
#85	},
#86	{
#87	"role": "assistant",
#88	"content": "I've noted that you prefer navy blue and dark mode interfaces. I'll also help you remember to check for peanuts in food recommendations due to your allergy.",
#89	},
#90	{
#91	"role": "user",
#92	"content": "My partner's name is Jamie, and we have a golden retriever named Max who is 3 years old. My parents live in Chicago, and I try to visit them once every two months.",
#93	},
#94	{
#95	"role": "assistant",
#96	"content": "I'll remember that your partner is Jamie, your dog Max is a 3-year-old golden retriever, and your parents live in Chicago whom you visit bimonthly.",
#97	},
#98	]
#99
#100	# Store the initial preferences
#101	store_user_preferences(USER_ID, messages)
#102	print("✅ Memory initialized with user preferences")
#103
#104
#105	voice_agent = Agent(
#106	role="Memory-based Voice Assistant",
#107	goal="Help the user with day-to-day tasks and remember their preferences over time.",
#108	backstory="You are a voice assistant who understands the user well and converse with them.",
#109	verbose=True,
#110	memory=True,
#111	memory_config={
#112	"provider": "mem0",
#113	"config": {"user_id": USER_ID},
#114	},
#115	)
#116
#117
#118	# ------------------ AUDIO RECORDING ------------------
#119	def record_audio(filename="input.wav", record_seconds=5):
#120	print("🎙️ Recording (speak now)...")
#121	chunk = 1024
#122	fmt = pyaudio.paInt16
#123	channels = 1
#124	rate = 44100
#125
#126	p = pyaudio.PyAudio()
#127	stream = p.open(format=fmt, channels=channels, rate=rate, input=True, frames_per_buffer=chunk)
#128	frames = []
#129
#130	for _ in range(0, int(rate / chunk * record_seconds)):
#131	data = stream.read(chunk)
#132	frames.append(data)
#133
#134	stream.stop_stream()
#135	stream.close()
#136	p.terminate()
#137
#138	with wave.open(filename, "wb") as wf:
#139	wf.setnchannels(channels)
#140	wf.setsampwidth(p.get_sample_size(fmt))
#141	wf.setframerate(rate)
#142	wf.writeframes(b"".join(frames))
#143
#144
#145	# ------------------ STT USING WHISPER ------------------
#146	def transcribe_whisper(audio_path):
#147	print("🔎 Transcribing with Whisper...")
#148	try:
#149	with open(audio_path, "rb") as audio_file:
#150	transcript = openai_client.audio.transcriptions.create(model="whisper-1", file=audio_file)
#151	print(f"🗣️ You said: {transcript.text}")
#152	return transcript.text
#153	except Exception as e:
#154	print(f"Error during transcription: {e}")
#155	return ""
#156
#157
#158	# ------------------ AGENT RESPONSE ------------------
#159	def get_agent_response(user_input):
#160	if not user_input:
#161	return "I didn't catch that. Could you please repeat?"
#162
#163	try:
#164	task = Task(
#165	description=f"Respond to: {user_input}", expected_output="A short and relevant reply.", agent=voice_agent
#166	)
#167	crew = Crew(
#168	agents=[voice_agent],
#169	tasks=[task],
#170	process=Process.sequential,
#171	verbose=True,
#172	memory=True,
#173	memory_config={"provider": "mem0", "config": {"user_id": USER_ID}},
#174	)
#175	result = crew.kickoff()
#176
#177	# Extract the text response from the complex result object
#178	if hasattr(result, "raw"):
#179	return result.raw
#180	elif isinstance(result, dict) and "raw" in result:
#181	return result["raw"]
#182	elif isinstance(result, dict) and "tasks_output" in result:
#183	outputs = result["tasks_output"]
#184	if outputs and isinstance(outputs, list) and len(outputs) > 0:
#185	return outputs[0].get("raw", str(result))
#186
#187	# Fallback to string representation if we can't extract the raw response
#188	return str(result)
#189
#190	except Exception as e:
#191	print(f"Error getting agent response: {e}")
#192	return "I'm having trouble processing that request. Can we try again?"
#193
#194
#195	# ------------------ SPEAK WITH ELEVENLABS ------------------
#196	def speak_response(text):
#197	print(f"🤖 Agent: {text}")
#198	audio = tts_client.text_to_speech.convert(
#199	text=text, voice_id="JBFqnCBsd6RMkjVDRZzb", model_id="eleven_multilingual_v2", output_format="mp3_44100_128"
#200	)
#201	play(audio)
#202
#203
#204	# ------------------ MAIN LOOP ------------------
#205	def run_voice_agent():
#206	print("🧠 Voice agent (Whisper + Mem0 + ElevenLabs) is ready! Say something.")
#207	while True:
#208	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_audio:
#209	record_audio(tmp_audio.name)
#210	try:
#211	user_text = transcribe_whisper(tmp_audio.name)
#212	if user_text.lower() in ["exit", "quit", "stop"]:
#213	print("👋 Exiting.")
#214	break
#215	response = get_agent_response(user_text)
#216	speak_response(response)
#217	except Exception as e:
#218	print(f"❌ Error: {e}")
#219
#220
#221	if __name__ == "__main__":
#222	try:
#223	# Initialize memory with user preferences before starting the voice agent (this can be done once)
#224	initialize_memory()
#225
#226	# Run the voice assistant
#227	run_voice_agent()
#228	except KeyboardInterrupt:
#229	print("\n👋 Program interrupted. Exiting.")
#230	except Exception as e:
#231	print(f"❌ Fatal error: {e}")
#232

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public