my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	"""
#2	Example of using vLLM with mem0 for high-performance memory operations.
#3
#4	SETUP INSTRUCTIONS:
#5	1. Install vLLM:
#6	pip install vllm
#7
#8	2. Start vLLM server (in a separate terminal):
#9	vllm serve microsoft/DialoGPT-small --port 8000
#10
#11	Wait for the message: "Uvicorn running on http://0.0.0.0:8000"
#12	(Small model: ~500MB download, much faster!)
#13
#14	3. Verify server is running:
#15	curl http://localhost:8000/health
#16
#17	4. Run this example:
#18	python examples/misc/vllm_example.py
#19
#20	Optional environment variables:
#21	export VLLM_BASE_URL="http://localhost:8000/v1"
#22	export VLLM_API_KEY="vllm-api-key"
#23	"""
#24
#25	from mem0 import Memory
#26
#27	# Configuration for vLLM integration
#28	config = {
#29	"llm": {
#30	"provider": "vllm",
#31	"config": {
#32	"model": "Qwen/Qwen2.5-32B-Instruct",
#33	"vllm_base_url": "http://localhost:8000/v1",
#34	"api_key": "vllm-api-key",
#35	"temperature": 0.7,
#36	"max_tokens": 100,
#37	},
#38	},
#39	"embedder": {"provider": "openai", "config": {"model": "text-embedding-3-small"}},
#40	"vector_store": {
#41	"provider": "qdrant",
#42	"config": {"collection_name": "vllm_memories", "host": "localhost", "port": 6333},
#43	},
#44	}
#45
#46
#47	def main():
#48	"""
#49	Demonstrate vLLM integration with mem0
#50	"""
#51	print("--> Initializing mem0 with vLLM...")
#52
#53	# Initialize memory with vLLM
#54	memory = Memory.from_config(config)
#55
#56	print("--> Memory initialized successfully!")
#57
#58	# Example conversations to store
#59	conversations = [
#60	{
#61	"messages": [
#62	{"role": "user", "content": "I love playing chess on weekends"},
#63	{
#64	"role": "assistant",
#65	"content": "That's great! Chess is an excellent strategic game that helps improve critical thinking.",
#66	},
#67	],
#68	"user_id": "user_123",
#69	},
#70	{
#71	"messages": [
#72	{"role": "user", "content": "I'm learning Python programming"},
#73	{
#74	"role": "assistant",
#75	"content": "Python is a fantastic language for beginners! What specific areas are you focusing on?",
#76	},
#77	],
#78	"user_id": "user_123",
#79	},
#80	{
#81	"messages": [
#82	{"role": "user", "content": "I prefer working late at night, I'm more productive then"},
#83	{
#84	"role": "assistant",
#85	"content": "Many people find they're more creative and focused during nighttime hours. It's important to maintain a consistent schedule that works for you.",
#86	},
#87	],
#88	"user_id": "user_123",
#89	},
#90	]
#91
#92	print("\n--> Adding memories using vLLM...")
#93
#94	# Add memories - now powered by vLLM's high-performance inference
#95	for i, conversation in enumerate(conversations, 1):
#96	result = memory.add(messages=conversation["messages"], user_id=conversation["user_id"])
#97	print(f"Memory {i} added: {result}")
#98
#99	print("\n🔍 Searching memories...")
#100
#101	# Search memories - vLLM will process the search and memory operations
#102	search_queries = [
#103	"What does the user like to do on weekends?",
#104	"What is the user learning?",
#105	"When is the user most productive?",
#106	]
#107
#108	for query in search_queries:
#109	print(f"\nQuery: {query}")
#110	memories = memory.search(query=query, user_id="user_123")
#111
#112	for memory_item in memories:
#113	print(f" - {memory_item['memory']}")
#114
#115	print("\n--> Getting all memories for user...")
#116	all_memories = memory.get_all(user_id="user_123")
#117	print(f"Total memories stored: {len(all_memories)}")
#118
#119	for memory_item in all_memories:
#120	print(f" - {memory_item['memory']}")
#121
#122	print("\n--> vLLM integration demo completed successfully!")
#123	print("\nBenefits of using vLLM:")
#124	print(" -> 2.7x higher throughput compared to standard implementations")
#125	print(" -> 5x faster time-per-output-token")
#126	print(" -> Efficient memory usage with PagedAttention")
#127	print(" -> Simple configuration, same as other providers")
#128
#129
#130	if __name__ == "__main__":
#131	try:
#132	main()
#133	except Exception as e:
#134	print(f"=> Error: {e}")
#135	print("\nTroubleshooting:")
#136	print("1. Make sure vLLM server is running: vllm serve microsoft/DialoGPT-small --port 8000")
#137	print("2. Check if the model is downloaded and accessible")
#138	print("3. Verify the base URL and port configuration")
#139	print("4. Ensure you have the required dependencies installed")
#140

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public