repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | """ |
| #2 | Personal Voice Assistant with Memory (Whisper + CrewAI + Mem0 + ElevenLabs) |
| #3 | This script creates a personalized AI assistant that can: |
| #4 | - Understand voice commands using Whisper (OpenAI STT) |
| #5 | - Respond intelligently using CrewAI Agent and LLMs |
| #6 | - Remember user preferences and facts using Mem0 memory |
| #7 | - Speak responses back using ElevenLabs text-to-speech |
| #8 | Initial user memory is bootstrapped from predefined preferences, and the assistant can remember new context dynamically over time. |
| #9 | |
| #10 | To run this file, you need to set the following environment variables: |
| #11 | |
| #12 | export OPENAI_API_KEY="your_openai_api_key" |
| #13 | export MEM0_API_KEY="your_mem0_api_key" |
| #14 | export ELEVENLABS_API_KEY="your_elevenlabs_api_key" |
| #15 | |
| #16 | You must also have: |
| #17 | - A working microphone setup (pyaudio) |
| #18 | - A valid ElevenLabs voice ID |
| #19 | - Python packages: openai, elevenlabs, crewai, mem0ai, pyaudio |
| #20 | """ |
| #21 | |
| #22 | import tempfile |
| #23 | import wave |
| #24 | |
| #25 | import pyaudio |
| #26 | from crewai import Agent, Crew, Process, Task |
| #27 | from elevenlabs import play |
| #28 | from elevenlabs.client import ElevenLabs |
| #29 | from openai import OpenAI |
| #30 | |
| #31 | from mem0 import MemoryClient |
| #32 | |
| #33 | # ------------------ SETUP ------------------ |
| #34 | USER_ID = "Alex" |
| #35 | openai_client = OpenAI() |
| #36 | tts_client = ElevenLabs() |
| #37 | memory_client = MemoryClient() |
| #38 | |
| #39 | |
| #40 | # Function to store user preferences in memory |
| #41 | def store_user_preferences(user_id: str, conversation: list): |
| #42 | """Store user preferences from conversation history""" |
| #43 | memory_client.add(conversation, user_id=user_id) |
| #44 | |
| #45 | |
| #46 | # Initialize memory with some basic preferences |
| #47 | def initialize_memory(): |
| #48 | # Example conversation storage with voice assistant relevant preferences |
| #49 | messages = [ |
| #50 | { |
| #51 | "role": "user", |
| #52 | "content": "Hi, my name is Alex Thompson. I'm 32 years old and work as a software engineer at TechCorp.", |
| #53 | }, |
| #54 | { |
| #55 | "role": "assistant", |
| #56 | "content": "Hello Alex Thompson! Nice to meet you. I've noted that you're 32 and work as a software engineer at TechCorp. How can I help you today?", |
| #57 | }, |
| #58 | { |
| #59 | "role": "user", |
| #60 | "content": "I prefer brief and concise responses without unnecessary explanations. I get frustrated when assistants are too wordy or repeat information I already know.", |
| #61 | }, |
| #62 | { |
| #63 | "role": "assistant", |
| #64 | "content": "Got it. I'll keep my responses short, direct, and without redundancy.", |
| #65 | }, |
| #66 | { |
| #67 | "role": "user", |
| #68 | "content": "I like to listen to jazz music when I'm working, especially artists like Miles Davis and John Coltrane. I find it helps me focus and be more productive.", |
| #69 | }, |
| #70 | { |
| #71 | "role": "assistant", |
| #72 | "content": "I'll remember your preference for jazz while working, particularly Miles Davis and John Coltrane. It's great for focus.", |
| #73 | }, |
| #74 | { |
| #75 | "role": "user", |
| #76 | "content": "I usually wake up at 7 AM and prefer reminders for meetings 30 minutes in advance. My most productive hours are between 9 AM and noon, so I try to schedule important tasks during that time.", |
| #77 | }, |
| #78 | { |
| #79 | "role": "assistant", |
| #80 | "content": "Noted. You wake up at 7 AM, need meeting reminders 30 minutes ahead, and are most productive between 9 AM and noon for important tasks.", |
| #81 | }, |
| #82 | { |
| #83 | "role": "user", |
| #84 | "content": "My favorite color is navy blue, and I prefer dark mode in all my apps. I'm allergic to peanuts, so please remind me to check ingredients when I ask about recipes or restaurants.", |
| #85 | }, |
| #86 | { |
| #87 | "role": "assistant", |
| #88 | "content": "I've noted that you prefer navy blue and dark mode interfaces. I'll also help you remember to check for peanuts in food recommendations due to your allergy.", |
| #89 | }, |
| #90 | { |
| #91 | "role": "user", |
| #92 | "content": "My partner's name is Jamie, and we have a golden retriever named Max who is 3 years old. My parents live in Chicago, and I try to visit them once every two months.", |
| #93 | }, |
| #94 | { |
| #95 | "role": "assistant", |
| #96 | "content": "I'll remember that your partner is Jamie, your dog Max is a 3-year-old golden retriever, and your parents live in Chicago whom you visit bimonthly.", |
| #97 | }, |
| #98 | ] |
| #99 | |
| #100 | # Store the initial preferences |
| #101 | store_user_preferences(USER_ID, messages) |
| #102 | print("✅ Memory initialized with user preferences") |
| #103 | |
| #104 | |
| #105 | voice_agent = Agent( |
| #106 | role="Memory-based Voice Assistant", |
| #107 | goal="Help the user with day-to-day tasks and remember their preferences over time.", |
| #108 | backstory="You are a voice assistant who understands the user well and converse with them.", |
| #109 | verbose=True, |
| #110 | memory=True, |
| #111 | memory_config={ |
| #112 | "provider": "mem0", |
| #113 | "config": {"user_id": USER_ID}, |
| #114 | }, |
| #115 | ) |
| #116 | |
| #117 | |
| #118 | # ------------------ AUDIO RECORDING ------------------ |
| #119 | def record_audio(filename="input.wav", record_seconds=5): |
| #120 | print("🎙️ Recording (speak now)...") |
| #121 | chunk = 1024 |
| #122 | fmt = pyaudio.paInt16 |
| #123 | channels = 1 |
| #124 | rate = 44100 |
| #125 | |
| #126 | p = pyaudio.PyAudio() |
| #127 | stream = p.open(format=fmt, channels=channels, rate=rate, input=True, frames_per_buffer=chunk) |
| #128 | frames = [] |
| #129 | |
| #130 | for _ in range(0, int(rate / chunk * record_seconds)): |
| #131 | data = stream.read(chunk) |
| #132 | frames.append(data) |
| #133 | |
| #134 | stream.stop_stream() |
| #135 | stream.close() |
| #136 | p.terminate() |
| #137 | |
| #138 | with wave.open(filename, "wb") as wf: |
| #139 | wf.setnchannels(channels) |
| #140 | wf.setsampwidth(p.get_sample_size(fmt)) |
| #141 | wf.setframerate(rate) |
| #142 | wf.writeframes(b"".join(frames)) |
| #143 | |
| #144 | |
| #145 | # ------------------ STT USING WHISPER ------------------ |
| #146 | def transcribe_whisper(audio_path): |
| #147 | print("🔎 Transcribing with Whisper...") |
| #148 | try: |
| #149 | with open(audio_path, "rb") as audio_file: |
| #150 | transcript = openai_client.audio.transcriptions.create(model="whisper-1", file=audio_file) |
| #151 | print(f"🗣️ You said: {transcript.text}") |
| #152 | return transcript.text |
| #153 | except Exception as e: |
| #154 | print(f"Error during transcription: {e}") |
| #155 | return "" |
| #156 | |
| #157 | |
| #158 | # ------------------ AGENT RESPONSE ------------------ |
| #159 | def get_agent_response(user_input): |
| #160 | if not user_input: |
| #161 | return "I didn't catch that. Could you please repeat?" |
| #162 | |
| #163 | try: |
| #164 | task = Task( |
| #165 | description=f"Respond to: {user_input}", expected_output="A short and relevant reply.", agent=voice_agent |
| #166 | ) |
| #167 | crew = Crew( |
| #168 | agents=[voice_agent], |
| #169 | tasks=[task], |
| #170 | process=Process.sequential, |
| #171 | verbose=True, |
| #172 | memory=True, |
| #173 | memory_config={"provider": "mem0", "config": {"user_id": USER_ID}}, |
| #174 | ) |
| #175 | result = crew.kickoff() |
| #176 | |
| #177 | # Extract the text response from the complex result object |
| #178 | if hasattr(result, "raw"): |
| #179 | return result.raw |
| #180 | elif isinstance(result, dict) and "raw" in result: |
| #181 | return result["raw"] |
| #182 | elif isinstance(result, dict) and "tasks_output" in result: |
| #183 | outputs = result["tasks_output"] |
| #184 | if outputs and isinstance(outputs, list) and len(outputs) > 0: |
| #185 | return outputs[0].get("raw", str(result)) |
| #186 | |
| #187 | # Fallback to string representation if we can't extract the raw response |
| #188 | return str(result) |
| #189 | |
| #190 | except Exception as e: |
| #191 | print(f"Error getting agent response: {e}") |
| #192 | return "I'm having trouble processing that request. Can we try again?" |
| #193 | |
| #194 | |
| #195 | # ------------------ SPEAK WITH ELEVENLABS ------------------ |
| #196 | def speak_response(text): |
| #197 | print(f"🤖 Agent: {text}") |
| #198 | audio = tts_client.text_to_speech.convert( |
| #199 | text=text, voice_id="JBFqnCBsd6RMkjVDRZzb", model_id="eleven_multilingual_v2", output_format="mp3_44100_128" |
| #200 | ) |
| #201 | play(audio) |
| #202 | |
| #203 | |
| #204 | # ------------------ MAIN LOOP ------------------ |
| #205 | def run_voice_agent(): |
| #206 | print("🧠 Voice agent (Whisper + Mem0 + ElevenLabs) is ready! Say something.") |
| #207 | while True: |
| #208 | with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_audio: |
| #209 | record_audio(tmp_audio.name) |
| #210 | try: |
| #211 | user_text = transcribe_whisper(tmp_audio.name) |
| #212 | if user_text.lower() in ["exit", "quit", "stop"]: |
| #213 | print("👋 Exiting.") |
| #214 | break |
| #215 | response = get_agent_response(user_text) |
| #216 | speak_response(response) |
| #217 | except Exception as e: |
| #218 | print(f"❌ Error: {e}") |
| #219 | |
| #220 | |
| #221 | if __name__ == "__main__": |
| #222 | try: |
| #223 | # Initialize memory with user preferences before starting the voice agent (this can be done once) |
| #224 | initialize_memory() |
| #225 | |
| #226 | # Run the voice assistant |
| #227 | run_voice_agent() |
| #228 | except KeyboardInterrupt: |
| #229 | print("\n👋 Program interrupted. Exiting.") |
| #230 | except Exception as e: |
| #231 | print(f"❌ Fatal error: {e}") |
| #232 |