repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | """ |
| #2 | Example of using vLLM with mem0 for high-performance memory operations. |
| #3 | |
| #4 | SETUP INSTRUCTIONS: |
| #5 | 1. Install vLLM: |
| #6 | pip install vllm |
| #7 | |
| #8 | 2. Start vLLM server (in a separate terminal): |
| #9 | vllm serve microsoft/DialoGPT-small --port 8000 |
| #10 | |
| #11 | Wait for the message: "Uvicorn running on http://0.0.0.0:8000" |
| #12 | (Small model: ~500MB download, much faster!) |
| #13 | |
| #14 | 3. Verify server is running: |
| #15 | curl http://localhost:8000/health |
| #16 | |
| #17 | 4. Run this example: |
| #18 | python examples/misc/vllm_example.py |
| #19 | |
| #20 | Optional environment variables: |
| #21 | export VLLM_BASE_URL="http://localhost:8000/v1" |
| #22 | export VLLM_API_KEY="vllm-api-key" |
| #23 | """ |
| #24 | |
| #25 | from mem0 import Memory |
| #26 | |
| #27 | # Configuration for vLLM integration |
| #28 | config = { |
| #29 | "llm": { |
| #30 | "provider": "vllm", |
| #31 | "config": { |
| #32 | "model": "Qwen/Qwen2.5-32B-Instruct", |
| #33 | "vllm_base_url": "http://localhost:8000/v1", |
| #34 | "api_key": "vllm-api-key", |
| #35 | "temperature": 0.7, |
| #36 | "max_tokens": 100, |
| #37 | }, |
| #38 | }, |
| #39 | "embedder": {"provider": "openai", "config": {"model": "text-embedding-3-small"}}, |
| #40 | "vector_store": { |
| #41 | "provider": "qdrant", |
| #42 | "config": {"collection_name": "vllm_memories", "host": "localhost", "port": 6333}, |
| #43 | }, |
| #44 | } |
| #45 | |
| #46 | |
| #47 | def main(): |
| #48 | """ |
| #49 | Demonstrate vLLM integration with mem0 |
| #50 | """ |
| #51 | print("--> Initializing mem0 with vLLM...") |
| #52 | |
| #53 | # Initialize memory with vLLM |
| #54 | memory = Memory.from_config(config) |
| #55 | |
| #56 | print("--> Memory initialized successfully!") |
| #57 | |
| #58 | # Example conversations to store |
| #59 | conversations = [ |
| #60 | { |
| #61 | "messages": [ |
| #62 | {"role": "user", "content": "I love playing chess on weekends"}, |
| #63 | { |
| #64 | "role": "assistant", |
| #65 | "content": "That's great! Chess is an excellent strategic game that helps improve critical thinking.", |
| #66 | }, |
| #67 | ], |
| #68 | "user_id": "user_123", |
| #69 | }, |
| #70 | { |
| #71 | "messages": [ |
| #72 | {"role": "user", "content": "I'm learning Python programming"}, |
| #73 | { |
| #74 | "role": "assistant", |
| #75 | "content": "Python is a fantastic language for beginners! What specific areas are you focusing on?", |
| #76 | }, |
| #77 | ], |
| #78 | "user_id": "user_123", |
| #79 | }, |
| #80 | { |
| #81 | "messages": [ |
| #82 | {"role": "user", "content": "I prefer working late at night, I'm more productive then"}, |
| #83 | { |
| #84 | "role": "assistant", |
| #85 | "content": "Many people find they're more creative and focused during nighttime hours. It's important to maintain a consistent schedule that works for you.", |
| #86 | }, |
| #87 | ], |
| #88 | "user_id": "user_123", |
| #89 | }, |
| #90 | ] |
| #91 | |
| #92 | print("\n--> Adding memories using vLLM...") |
| #93 | |
| #94 | # Add memories - now powered by vLLM's high-performance inference |
| #95 | for i, conversation in enumerate(conversations, 1): |
| #96 | result = memory.add(messages=conversation["messages"], user_id=conversation["user_id"]) |
| #97 | print(f"Memory {i} added: {result}") |
| #98 | |
| #99 | print("\n🔍 Searching memories...") |
| #100 | |
| #101 | # Search memories - vLLM will process the search and memory operations |
| #102 | search_queries = [ |
| #103 | "What does the user like to do on weekends?", |
| #104 | "What is the user learning?", |
| #105 | "When is the user most productive?", |
| #106 | ] |
| #107 | |
| #108 | for query in search_queries: |
| #109 | print(f"\nQuery: {query}") |
| #110 | memories = memory.search(query=query, user_id="user_123") |
| #111 | |
| #112 | for memory_item in memories: |
| #113 | print(f" - {memory_item['memory']}") |
| #114 | |
| #115 | print("\n--> Getting all memories for user...") |
| #116 | all_memories = memory.get_all(user_id="user_123") |
| #117 | print(f"Total memories stored: {len(all_memories)}") |
| #118 | |
| #119 | for memory_item in all_memories: |
| #120 | print(f" - {memory_item['memory']}") |
| #121 | |
| #122 | print("\n--> vLLM integration demo completed successfully!") |
| #123 | print("\nBenefits of using vLLM:") |
| #124 | print(" -> 2.7x higher throughput compared to standard implementations") |
| #125 | print(" -> 5x faster time-per-output-token") |
| #126 | print(" -> Efficient memory usage with PagedAttention") |
| #127 | print(" -> Simple configuration, same as other providers") |
| #128 | |
| #129 | |
| #130 | if __name__ == "__main__": |
| #131 | try: |
| #132 | main() |
| #133 | except Exception as e: |
| #134 | print(f"=> Error: {e}") |
| #135 | print("\nTroubleshooting:") |
| #136 | print("1. Make sure vLLM server is running: vllm serve microsoft/DialoGPT-small --port 8000") |
| #137 | print("2. Check if the model is downloaded and accessible") |
| #138 | print("3. Verify the base URL and port configuration") |
| #139 | print("4. Ensure you have the required dependencies installed") |
| #140 |