repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | import logging |
| #2 | from importlib.metadata import version |
| #3 | from typing import Any, Dict, List, Optional |
| #4 | |
| #5 | from pydantic import BaseModel |
| #6 | |
| #7 | try: |
| #8 | from pymongo import MongoClient |
| #9 | from pymongo.driver_info import DriverInfo |
| #10 | from pymongo.errors import PyMongoError |
| #11 | from pymongo.operations import SearchIndexModel |
| #12 | except ImportError: |
| #13 | raise ImportError("The 'pymongo' library is required. Please install it using 'pip install pymongo'.") |
| #14 | |
| #15 | from mem0.vector_stores.base import VectorStoreBase |
| #16 | |
| #17 | logger = logging.getLogger(__name__) |
| #18 | logging.basicConfig(level=logging.INFO) |
| #19 | |
| #20 | _DRIVER_METADATA = DriverInfo(name="Mem0", version=version("mem0ai")) |
| #21 | |
| #22 | class OutputData(BaseModel): |
| #23 | id: Optional[str] |
| #24 | score: Optional[float] |
| #25 | payload: Optional[dict] |
| #26 | |
| #27 | |
| #28 | class MongoDB(VectorStoreBase): |
| #29 | VECTOR_TYPE = "knnVector" |
| #30 | SIMILARITY_METRIC = "cosine" |
| #31 | |
| #32 | def __init__(self, db_name: str, collection_name: str, embedding_model_dims: int, mongo_uri: str): |
| #33 | """ |
| #34 | Initialize the MongoDB vector store with vector search capabilities. |
| #35 | |
| #36 | Args: |
| #37 | db_name (str): Database name |
| #38 | collection_name (str): Collection name |
| #39 | embedding_model_dims (int): Dimension of the embedding vector |
| #40 | mongo_uri (str): MongoDB connection URI |
| #41 | """ |
| #42 | self.collection_name = collection_name |
| #43 | self.embedding_model_dims = embedding_model_dims |
| #44 | self.db_name = db_name |
| #45 | |
| #46 | self.client = MongoClient(mongo_uri, driver=_DRIVER_METADATA) |
| #47 | self.db = self.client[db_name] |
| #48 | self.collection = self.create_col() |
| #49 | |
| #50 | def create_col(self): |
| #51 | """Create new collection with vector search index.""" |
| #52 | try: |
| #53 | database = self.client[self.db_name] |
| #54 | collection_names = database.list_collection_names() |
| #55 | if self.collection_name not in collection_names: |
| #56 | logger.info(f"Collection '{self.collection_name}' does not exist. Creating it now.") |
| #57 | collection = database[self.collection_name] |
| #58 | # Insert and remove a placeholder document to create the collection |
| #59 | collection.insert_one({"_id": 0, "placeholder": True}) |
| #60 | collection.delete_one({"_id": 0}) |
| #61 | logger.info(f"Collection '{self.collection_name}' created successfully.") |
| #62 | else: |
| #63 | collection = database[self.collection_name] |
| #64 | |
| #65 | self.index_name = f"{self.collection_name}_vector_index" |
| #66 | found_indexes = list(collection.list_search_indexes(name=self.index_name)) |
| #67 | if found_indexes: |
| #68 | logger.info(f"Search index '{self.index_name}' already exists in collection '{self.collection_name}'.") |
| #69 | else: |
| #70 | search_index_model = SearchIndexModel( |
| #71 | name=self.index_name, |
| #72 | definition={ |
| #73 | "mappings": { |
| #74 | "dynamic": False, |
| #75 | "fields": { |
| #76 | "embedding": { |
| #77 | "type": self.VECTOR_TYPE, |
| #78 | "dimensions": self.embedding_model_dims, |
| #79 | "similarity": self.SIMILARITY_METRIC, |
| #80 | } |
| #81 | }, |
| #82 | } |
| #83 | }, |
| #84 | ) |
| #85 | collection.create_search_index(search_index_model) |
| #86 | logger.info( |
| #87 | f"Search index '{self.index_name}' created successfully for collection '{self.collection_name}'." |
| #88 | ) |
| #89 | return collection |
| #90 | except PyMongoError as e: |
| #91 | logger.error(f"Error creating collection and search index: {e}") |
| #92 | return None |
| #93 | |
| #94 | def insert( |
| #95 | self, vectors: List[List[float]], payloads: Optional[List[Dict]] = None, ids: Optional[List[str]] = None |
| #96 | ) -> None: |
| #97 | """ |
| #98 | Insert vectors into the collection. |
| #99 | |
| #100 | Args: |
| #101 | vectors (List[List[float]]): List of vectors to insert. |
| #102 | payloads (List[Dict], optional): List of payloads corresponding to vectors. |
| #103 | ids (List[str], optional): List of IDs corresponding to vectors. |
| #104 | """ |
| #105 | logger.info(f"Inserting {len(vectors)} vectors into collection '{self.collection_name}'.") |
| #106 | |
| #107 | data = [] |
| #108 | for vector, payload, _id in zip(vectors, payloads or [{}] * len(vectors), ids or [None] * len(vectors)): |
| #109 | document = {"_id": _id, "embedding": vector, "payload": payload} |
| #110 | data.append(document) |
| #111 | try: |
| #112 | self.collection.insert_many(data) |
| #113 | logger.info(f"Inserted {len(data)} documents into '{self.collection_name}'.") |
| #114 | except PyMongoError as e: |
| #115 | logger.error(f"Error inserting data: {e}") |
| #116 | |
| #117 | def search(self, query: str, vectors: List[float], limit=5, filters: Optional[Dict] = None) -> List[OutputData]: |
| #118 | """ |
| #119 | Search for similar vectors using the vector search index. |
| #120 | |
| #121 | Args: |
| #122 | query (str): Query string |
| #123 | vectors (List[float]): Query vector. |
| #124 | limit (int, optional): Number of results to return. Defaults to 5. |
| #125 | filters (Dict, optional): Filters to apply to the search. |
| #126 | |
| #127 | Returns: |
| #128 | List[OutputData]: Search results. |
| #129 | """ |
| #130 | |
| #131 | found_indexes = list(self.collection.list_search_indexes(name=self.index_name)) |
| #132 | if not found_indexes: |
| #133 | logger.error(f"Index '{self.index_name}' does not exist.") |
| #134 | return [] |
| #135 | |
| #136 | results = [] |
| #137 | try: |
| #138 | collection = self.client[self.db_name][self.collection_name] |
| #139 | pipeline = [ |
| #140 | { |
| #141 | "$vectorSearch": { |
| #142 | "index": self.index_name, |
| #143 | "limit": limit, |
| #144 | "numCandidates": limit, |
| #145 | "queryVector": vectors, |
| #146 | "path": "embedding", |
| #147 | } |
| #148 | }, |
| #149 | {"$set": {"score": {"$meta": "vectorSearchScore"}}}, |
| #150 | {"$project": {"embedding": 0}}, |
| #151 | ] |
| #152 | |
| #153 | # Add filter stage if filters are provided |
| #154 | if filters: |
| #155 | filter_conditions = [] |
| #156 | for key, value in filters.items(): |
| #157 | filter_conditions.append({"payload." + key: value}) |
| #158 | |
| #159 | if filter_conditions: |
| #160 | # Add a $match stage after vector search to apply filters |
| #161 | pipeline.insert(1, {"$match": {"$and": filter_conditions}}) |
| #162 | |
| #163 | results = list(collection.aggregate(pipeline)) |
| #164 | logger.info(f"Vector search completed. Found {len(results)} documents.") |
| #165 | except Exception as e: |
| #166 | logger.error(f"Error during vector search for query {query}: {e}") |
| #167 | return [] |
| #168 | |
| #169 | output = [OutputData(id=str(doc["_id"]), score=doc.get("score"), payload=doc.get("payload")) for doc in results] |
| #170 | return output |
| #171 | |
| #172 | def delete(self, vector_id: str) -> None: |
| #173 | """ |
| #174 | Delete a vector by ID. |
| #175 | |
| #176 | Args: |
| #177 | vector_id (str): ID of the vector to delete. |
| #178 | """ |
| #179 | try: |
| #180 | result = self.collection.delete_one({"_id": vector_id}) |
| #181 | if result.deleted_count > 0: |
| #182 | logger.info(f"Deleted document with ID '{vector_id}'.") |
| #183 | else: |
| #184 | logger.warning(f"No document found with ID '{vector_id}' to delete.") |
| #185 | except PyMongoError as e: |
| #186 | logger.error(f"Error deleting document: {e}") |
| #187 | |
| #188 | def update(self, vector_id: str, vector: Optional[List[float]] = None, payload: Optional[Dict] = None) -> None: |
| #189 | """ |
| #190 | Update a vector and its payload. |
| #191 | |
| #192 | Args: |
| #193 | vector_id (str): ID of the vector to update. |
| #194 | vector (List[float], optional): Updated vector. |
| #195 | payload (Dict, optional): Updated payload. |
| #196 | """ |
| #197 | update_fields = {} |
| #198 | if vector is not None: |
| #199 | update_fields["embedding"] = vector |
| #200 | if payload is not None: |
| #201 | update_fields["payload"] = payload |
| #202 | |
| #203 | if update_fields: |
| #204 | try: |
| #205 | result = self.collection.update_one({"_id": vector_id}, {"$set": update_fields}) |
| #206 | if result.matched_count > 0: |
| #207 | logger.info(f"Updated document with ID '{vector_id}'.") |
| #208 | else: |
| #209 | logger.warning(f"No document found with ID '{vector_id}' to update.") |
| #210 | except PyMongoError as e: |
| #211 | logger.error(f"Error updating document: {e}") |
| #212 | |
| #213 | def get(self, vector_id: str) -> Optional[OutputData]: |
| #214 | """ |
| #215 | Retrieve a vector by ID. |
| #216 | |
| #217 | Args: |
| #218 | vector_id (str): ID of the vector to retrieve. |
| #219 | |
| #220 | Returns: |
| #221 | Optional[OutputData]: Retrieved vector or None if not found. |
| #222 | """ |
| #223 | try: |
| #224 | doc = self.collection.find_one({"_id": vector_id}) |
| #225 | if doc: |
| #226 | logger.info(f"Retrieved document with ID '{vector_id}'.") |
| #227 | return OutputData(id=str(doc["_id"]), score=None, payload=doc.get("payload")) |
| #228 | else: |
| #229 | logger.warning(f"Document with ID '{vector_id}' not found.") |
| #230 | return None |
| #231 | except PyMongoError as e: |
| #232 | logger.error(f"Error retrieving document: {e}") |
| #233 | return None |
| #234 | |
| #235 | def list_cols(self) -> List[str]: |
| #236 | """ |
| #237 | List all collections in the database. |
| #238 | |
| #239 | Returns: |
| #240 | List[str]: List of collection names. |
| #241 | """ |
| #242 | try: |
| #243 | collections = self.db.list_collection_names() |
| #244 | logger.info(f"Listing collections in database '{self.db_name}': {collections}") |
| #245 | return collections |
| #246 | except PyMongoError as e: |
| #247 | logger.error(f"Error listing collections: {e}") |
| #248 | return [] |
| #249 | |
| #250 | def delete_col(self) -> None: |
| #251 | """Delete the collection.""" |
| #252 | try: |
| #253 | self.collection.drop() |
| #254 | logger.info(f"Deleted collection '{self.collection_name}'.") |
| #255 | except PyMongoError as e: |
| #256 | logger.error(f"Error deleting collection: {e}") |
| #257 | |
| #258 | def col_info(self) -> Dict[str, Any]: |
| #259 | """ |
| #260 | Get information about the collection. |
| #261 | |
| #262 | Returns: |
| #263 | Dict[str, Any]: Collection information. |
| #264 | """ |
| #265 | try: |
| #266 | stats = self.db.command("collstats", self.collection_name) |
| #267 | info = {"name": self.collection_name, "count": stats.get("count"), "size": stats.get("size")} |
| #268 | logger.info(f"Collection info: {info}") |
| #269 | return info |
| #270 | except PyMongoError as e: |
| #271 | logger.error(f"Error getting collection info: {e}") |
| #272 | return {} |
| #273 | |
| #274 | def list(self, filters: Optional[Dict] = None, limit: int = 100) -> List[OutputData]: |
| #275 | """ |
| #276 | List vectors in the collection. |
| #277 | |
| #278 | Args: |
| #279 | filters (Dict, optional): Filters to apply to the list. |
| #280 | limit (int, optional): Number of vectors to return. |
| #281 | |
| #282 | Returns: |
| #283 | List[OutputData]: List of vectors. |
| #284 | """ |
| #285 | try: |
| #286 | query = {} |
| #287 | if filters: |
| #288 | # Apply filters to the payload field |
| #289 | filter_conditions = [] |
| #290 | for key, value in filters.items(): |
| #291 | filter_conditions.append({"payload." + key: value}) |
| #292 | if filter_conditions: |
| #293 | query = {"$and": filter_conditions} |
| #294 | |
| #295 | cursor = self.collection.find(query).limit(limit) |
| #296 | results = [OutputData(id=str(doc["_id"]), score=None, payload=doc.get("payload")) for doc in cursor] |
| #297 | logger.info(f"Retrieved {len(results)} documents from collection '{self.collection_name}'.") |
| #298 | return results |
| #299 | except PyMongoError as e: |
| #300 | logger.error(f"Error listing documents: {e}") |
| #301 | return [] |
| #302 | |
| #303 | def reset(self): |
| #304 | """Reset the index by deleting and recreating it.""" |
| #305 | logger.warning(f"Resetting index {self.collection_name}...") |
| #306 | self.delete_col() |
| #307 | self.collection = self.create_col(self.collection_name) |
| #308 | |
| #309 | def __del__(self) -> None: |
| #310 | """Close the database connection when the object is deleted.""" |
| #311 | if hasattr(self, "client"): |
| #312 | self.client.close() |
| #313 | logger.info("MongoClient connection closed.") |
| #314 |