repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | import logging |
| #2 | import os |
| #3 | import pickle |
| #4 | import uuid |
| #5 | from pathlib import Path |
| #6 | from typing import Dict, List, Optional |
| #7 | |
| #8 | import numpy as np |
| #9 | from pydantic import BaseModel |
| #10 | |
| #11 | import warnings |
| #12 | |
| #13 | try: |
| #14 | # Suppress SWIG deprecation warnings from FAISS |
| #15 | warnings.filterwarnings("ignore", category=DeprecationWarning, message=".*SwigPy.*") |
| #16 | warnings.filterwarnings("ignore", category=DeprecationWarning, message=".*swigvarlink.*") |
| #17 | |
| #18 | logging.getLogger("faiss").setLevel(logging.WARNING) |
| #19 | logging.getLogger("faiss.loader").setLevel(logging.WARNING) |
| #20 | |
| #21 | import faiss |
| #22 | except ImportError: |
| #23 | raise ImportError( |
| #24 | "Could not import faiss python package. " |
| #25 | "Please install it with `pip install faiss-gpu` (for CUDA supported GPU) " |
| #26 | "or `pip install faiss-cpu` (depending on Python version)." |
| #27 | ) |
| #28 | |
| #29 | from mem0.vector_stores.base import VectorStoreBase |
| #30 | |
| #31 | logger = logging.getLogger(__name__) |
| #32 | |
| #33 | |
| #34 | class OutputData(BaseModel): |
| #35 | id: Optional[str] # memory id |
| #36 | score: Optional[float] # distance |
| #37 | payload: Optional[Dict] # metadata |
| #38 | |
| #39 | |
| #40 | class FAISS(VectorStoreBase): |
| #41 | def __init__( |
| #42 | self, |
| #43 | collection_name: str, |
| #44 | path: Optional[str] = None, |
| #45 | distance_strategy: str = "euclidean", |
| #46 | normalize_L2: bool = False, |
| #47 | embedding_model_dims: int = 1536, |
| #48 | ): |
| #49 | """ |
| #50 | Initialize the FAISS vector store. |
| #51 | |
| #52 | Args: |
| #53 | collection_name (str): Name of the collection. |
| #54 | path (str, optional): Path for local FAISS database. Defaults to None. |
| #55 | distance_strategy (str, optional): Distance strategy to use. Options: 'euclidean', 'inner_product', 'cosine'. |
| #56 | Defaults to "euclidean". |
| #57 | normalize_L2 (bool, optional): Whether to normalize L2 vectors. Only applicable for euclidean distance. |
| #58 | Defaults to False. |
| #59 | """ |
| #60 | self.collection_name = collection_name |
| #61 | self.path = path or f"/tmp/faiss/{collection_name}" |
| #62 | self.distance_strategy = distance_strategy |
| #63 | self.normalize_L2 = normalize_L2 |
| #64 | self.embedding_model_dims = embedding_model_dims |
| #65 | |
| #66 | # Initialize storage structures |
| #67 | self.index = None |
| #68 | self.docstore = {} |
| #69 | self.index_to_id = {} |
| #70 | |
| #71 | # Create directory if it doesn't exist |
| #72 | if self.path: |
| #73 | os.makedirs(os.path.dirname(self.path), exist_ok=True) |
| #74 | |
| #75 | # Try to load existing index if available |
| #76 | index_path = f"{self.path}/{collection_name}.faiss" |
| #77 | docstore_path = f"{self.path}/{collection_name}.pkl" |
| #78 | if os.path.exists(index_path) and os.path.exists(docstore_path): |
| #79 | self._load(index_path, docstore_path) |
| #80 | else: |
| #81 | self.create_col(collection_name) |
| #82 | |
| #83 | def _load(self, index_path: str, docstore_path: str): |
| #84 | """ |
| #85 | Load FAISS index and docstore from disk. |
| #86 | |
| #87 | Args: |
| #88 | index_path (str): Path to FAISS index file. |
| #89 | docstore_path (str): Path to docstore pickle file. |
| #90 | """ |
| #91 | try: |
| #92 | self.index = faiss.read_index(index_path) |
| #93 | with open(docstore_path, "rb") as f: |
| #94 | self.docstore, self.index_to_id = pickle.load(f) |
| #95 | logger.info(f"Loaded FAISS index from {index_path} with {self.index.ntotal} vectors") |
| #96 | except Exception as e: |
| #97 | logger.warning(f"Failed to load FAISS index: {e}") |
| #98 | |
| #99 | self.docstore = {} |
| #100 | self.index_to_id = {} |
| #101 | |
| #102 | def _save(self): |
| #103 | """Save FAISS index and docstore to disk.""" |
| #104 | if not self.path or not self.index: |
| #105 | return |
| #106 | |
| #107 | try: |
| #108 | os.makedirs(self.path, exist_ok=True) |
| #109 | index_path = f"{self.path}/{self.collection_name}.faiss" |
| #110 | docstore_path = f"{self.path}/{self.collection_name}.pkl" |
| #111 | |
| #112 | faiss.write_index(self.index, index_path) |
| #113 | with open(docstore_path, "wb") as f: |
| #114 | pickle.dump((self.docstore, self.index_to_id), f) |
| #115 | except Exception as e: |
| #116 | logger.warning(f"Failed to save FAISS index: {e}") |
| #117 | |
| #118 | def _parse_output(self, scores, ids, limit=None) -> List[OutputData]: |
| #119 | """ |
| #120 | Parse the output data. |
| #121 | |
| #122 | Args: |
| #123 | scores: Similarity scores from FAISS. |
| #124 | ids: Indices from FAISS. |
| #125 | limit: Maximum number of results to return. |
| #126 | |
| #127 | Returns: |
| #128 | List[OutputData]: Parsed output data. |
| #129 | """ |
| #130 | if limit is None: |
| #131 | limit = len(ids) |
| #132 | |
| #133 | results = [] |
| #134 | for i in range(min(len(ids), limit)): |
| #135 | if ids[i] == -1: # FAISS returns -1 for empty results |
| #136 | continue |
| #137 | |
| #138 | index_id = int(ids[i]) |
| #139 | vector_id = self.index_to_id.get(index_id) |
| #140 | if vector_id is None: |
| #141 | continue |
| #142 | |
| #143 | payload = self.docstore.get(vector_id) |
| #144 | if payload is None: |
| #145 | continue |
| #146 | |
| #147 | payload_copy = payload.copy() |
| #148 | |
| #149 | score = float(scores[i]) |
| #150 | entry = OutputData( |
| #151 | id=vector_id, |
| #152 | score=score, |
| #153 | payload=payload_copy, |
| #154 | ) |
| #155 | results.append(entry) |
| #156 | |
| #157 | return results |
| #158 | |
| #159 | def create_col(self, name: str, distance: str = None): |
| #160 | """ |
| #161 | Create a new collection. |
| #162 | |
| #163 | Args: |
| #164 | name (str): Name of the collection. |
| #165 | distance (str, optional): Distance metric to use. Overrides the distance_strategy |
| #166 | passed during initialization. Defaults to None. |
| #167 | |
| #168 | Returns: |
| #169 | self: The FAISS instance. |
| #170 | """ |
| #171 | distance_strategy = distance or self.distance_strategy |
| #172 | |
| #173 | # Create index based on distance strategy |
| #174 | if distance_strategy.lower() == "inner_product" or distance_strategy.lower() == "cosine": |
| #175 | self.index = faiss.IndexFlatIP(self.embedding_model_dims) |
| #176 | else: |
| #177 | self.index = faiss.IndexFlatL2(self.embedding_model_dims) |
| #178 | |
| #179 | self.collection_name = name |
| #180 | |
| #181 | self._save() |
| #182 | |
| #183 | return self |
| #184 | |
| #185 | def insert( |
| #186 | self, |
| #187 | vectors: List[list], |
| #188 | payloads: Optional[List[Dict]] = None, |
| #189 | ids: Optional[List[str]] = None, |
| #190 | ): |
| #191 | """ |
| #192 | Insert vectors into a collection. |
| #193 | |
| #194 | Args: |
| #195 | vectors (List[list]): List of vectors to insert. |
| #196 | payloads (Optional[List[Dict]], optional): List of payloads corresponding to vectors. Defaults to None. |
| #197 | ids (Optional[List[str]], optional): List of IDs corresponding to vectors. Defaults to None. |
| #198 | """ |
| #199 | if self.index is None: |
| #200 | raise ValueError("Collection not initialized. Call create_col first.") |
| #201 | |
| #202 | if ids is None: |
| #203 | ids = [str(uuid.uuid4()) for _ in range(len(vectors))] |
| #204 | |
| #205 | if payloads is None: |
| #206 | payloads = [{} for _ in range(len(vectors))] |
| #207 | |
| #208 | if len(vectors) != len(ids) or len(vectors) != len(payloads): |
| #209 | raise ValueError("Vectors, payloads, and IDs must have the same length") |
| #210 | |
| #211 | vectors_np = np.array(vectors, dtype=np.float32) |
| #212 | |
| #213 | if self.normalize_L2 and self.distance_strategy.lower() == "euclidean": |
| #214 | faiss.normalize_L2(vectors_np) |
| #215 | |
| #216 | self.index.add(vectors_np) |
| #217 | |
| #218 | starting_idx = len(self.index_to_id) |
| #219 | for i, (vector_id, payload) in enumerate(zip(ids, payloads)): |
| #220 | self.docstore[vector_id] = payload.copy() |
| #221 | self.index_to_id[starting_idx + i] = vector_id |
| #222 | |
| #223 | self._save() |
| #224 | |
| #225 | logger.info(f"Inserted {len(vectors)} vectors into collection {self.collection_name}") |
| #226 | |
| #227 | def search( |
| #228 | self, query: str, vectors: List[list], limit: int = 5, filters: Optional[Dict] = None |
| #229 | ) -> List[OutputData]: |
| #230 | """ |
| #231 | Search for similar vectors. |
| #232 | |
| #233 | Args: |
| #234 | query (str): Query (not used, kept for API compatibility). |
| #235 | vectors (List[list]): List of vectors to search. |
| #236 | limit (int, optional): Number of results to return. Defaults to 5. |
| #237 | filters (Optional[Dict], optional): Filters to apply to the search. Defaults to None. |
| #238 | |
| #239 | Returns: |
| #240 | List[OutputData]: Search results. |
| #241 | """ |
| #242 | if self.index is None: |
| #243 | raise ValueError("Collection not initialized. Call create_col first.") |
| #244 | |
| #245 | query_vectors = np.array(vectors, dtype=np.float32) |
| #246 | |
| #247 | if len(query_vectors.shape) == 1: |
| #248 | query_vectors = query_vectors.reshape(1, -1) |
| #249 | |
| #250 | if self.normalize_L2 and self.distance_strategy.lower() == "euclidean": |
| #251 | faiss.normalize_L2(query_vectors) |
| #252 | |
| #253 | fetch_k = limit * 2 if filters else limit |
| #254 | scores, indices = self.index.search(query_vectors, fetch_k) |
| #255 | |
| #256 | results = self._parse_output(scores[0], indices[0], limit) |
| #257 | |
| #258 | if filters: |
| #259 | filtered_results = [] |
| #260 | for result in results: |
| #261 | if self._apply_filters(result.payload, filters): |
| #262 | filtered_results.append(result) |
| #263 | if len(filtered_results) >= limit: |
| #264 | break |
| #265 | results = filtered_results[:limit] |
| #266 | |
| #267 | return results |
| #268 | |
| #269 | def _apply_filters(self, payload: Dict, filters: Dict) -> bool: |
| #270 | """ |
| #271 | Apply filters to a payload. |
| #272 | |
| #273 | Args: |
| #274 | payload (Dict): Payload to filter. |
| #275 | filters (Dict): Filters to apply. |
| #276 | |
| #277 | Returns: |
| #278 | bool: True if payload passes filters, False otherwise. |
| #279 | """ |
| #280 | if not filters or not payload: |
| #281 | return True |
| #282 | |
| #283 | for key, value in filters.items(): |
| #284 | if key not in payload: |
| #285 | return False |
| #286 | |
| #287 | if isinstance(value, list): |
| #288 | if payload[key] not in value: |
| #289 | return False |
| #290 | elif payload[key] != value: |
| #291 | return False |
| #292 | |
| #293 | return True |
| #294 | |
| #295 | def delete(self, vector_id: str): |
| #296 | """ |
| #297 | Delete a vector by ID. |
| #298 | |
| #299 | Args: |
| #300 | vector_id (str): ID of the vector to delete. |
| #301 | """ |
| #302 | if self.index is None: |
| #303 | raise ValueError("Collection not initialized. Call create_col first.") |
| #304 | |
| #305 | index_to_delete = None |
| #306 | for idx, vid in self.index_to_id.items(): |
| #307 | if vid == vector_id: |
| #308 | index_to_delete = idx |
| #309 | break |
| #310 | |
| #311 | if index_to_delete is not None: |
| #312 | self.docstore.pop(vector_id, None) |
| #313 | self.index_to_id.pop(index_to_delete, None) |
| #314 | |
| #315 | self._save() |
| #316 | |
| #317 | logger.info(f"Deleted vector {vector_id} from collection {self.collection_name}") |
| #318 | else: |
| #319 | logger.warning(f"Vector {vector_id} not found in collection {self.collection_name}") |
| #320 | |
| #321 | def update( |
| #322 | self, |
| #323 | vector_id: str, |
| #324 | vector: Optional[List[float]] = None, |
| #325 | payload: Optional[Dict] = None, |
| #326 | ): |
| #327 | """ |
| #328 | Update a vector and its payload. |
| #329 | |
| #330 | Args: |
| #331 | vector_id (str): ID of the vector to update. |
| #332 | vector (Optional[List[float]], optional): Updated vector. Defaults to None. |
| #333 | payload (Optional[Dict], optional): Updated payload. Defaults to None. |
| #334 | """ |
| #335 | if self.index is None: |
| #336 | raise ValueError("Collection not initialized. Call create_col first.") |
| #337 | |
| #338 | if vector_id not in self.docstore: |
| #339 | raise ValueError(f"Vector {vector_id} not found") |
| #340 | |
| #341 | current_payload = self.docstore[vector_id].copy() |
| #342 | |
| #343 | if payload is not None: |
| #344 | self.docstore[vector_id] = payload.copy() |
| #345 | current_payload = self.docstore[vector_id].copy() |
| #346 | |
| #347 | if vector is not None: |
| #348 | self.delete(vector_id) |
| #349 | self.insert([vector], [current_payload], [vector_id]) |
| #350 | else: |
| #351 | self._save() |
| #352 | |
| #353 | logger.info(f"Updated vector {vector_id} in collection {self.collection_name}") |
| #354 | |
| #355 | def get(self, vector_id: str) -> OutputData: |
| #356 | """ |
| #357 | Retrieve a vector by ID. |
| #358 | |
| #359 | Args: |
| #360 | vector_id (str): ID of the vector to retrieve. |
| #361 | |
| #362 | Returns: |
| #363 | OutputData: Retrieved vector. |
| #364 | """ |
| #365 | if self.index is None: |
| #366 | raise ValueError("Collection not initialized. Call create_col first.") |
| #367 | |
| #368 | if vector_id not in self.docstore: |
| #369 | return None |
| #370 | |
| #371 | payload = self.docstore[vector_id].copy() |
| #372 | |
| #373 | return OutputData( |
| #374 | id=vector_id, |
| #375 | score=None, |
| #376 | payload=payload, |
| #377 | ) |
| #378 | |
| #379 | def list_cols(self) -> List[str]: |
| #380 | """ |
| #381 | List all collections. |
| #382 | |
| #383 | Returns: |
| #384 | List[str]: List of collection names. |
| #385 | """ |
| #386 | if not self.path: |
| #387 | return [self.collection_name] if self.index else [] |
| #388 | |
| #389 | try: |
| #390 | collections = [] |
| #391 | path = Path(self.path).parent |
| #392 | for file in path.glob("*.faiss"): |
| #393 | collections.append(file.stem) |
| #394 | return collections |
| #395 | except Exception as e: |
| #396 | logger.warning(f"Failed to list collections: {e}") |
| #397 | return [self.collection_name] if self.index else [] |
| #398 | |
| #399 | def delete_col(self): |
| #400 | """ |
| #401 | Delete a collection. |
| #402 | """ |
| #403 | if self.path: |
| #404 | try: |
| #405 | index_path = f"{self.path}/{self.collection_name}.faiss" |
| #406 | docstore_path = f"{self.path}/{self.collection_name}.pkl" |
| #407 | |
| #408 | if os.path.exists(index_path): |
| #409 | os.remove(index_path) |
| #410 | if os.path.exists(docstore_path): |
| #411 | os.remove(docstore_path) |
| #412 | |
| #413 | logger.info(f"Deleted collection {self.collection_name}") |
| #414 | except Exception as e: |
| #415 | logger.warning(f"Failed to delete collection: {e}") |
| #416 | |
| #417 | self.index = None |
| #418 | self.docstore = {} |
| #419 | self.index_to_id = {} |
| #420 | |
| #421 | def col_info(self) -> Dict: |
| #422 | """ |
| #423 | Get information about a collection. |
| #424 | |
| #425 | Returns: |
| #426 | Dict: Collection information. |
| #427 | """ |
| #428 | if self.index is None: |
| #429 | return {"name": self.collection_name, "count": 0} |
| #430 | |
| #431 | return { |
| #432 | "name": self.collection_name, |
| #433 | "count": self.index.ntotal, |
| #434 | "dimension": self.index.d, |
| #435 | "distance": self.distance_strategy, |
| #436 | } |
| #437 | |
| #438 | def list(self, filters: Optional[Dict] = None, limit: int = 100) -> List[OutputData]: |
| #439 | """ |
| #440 | List all vectors in a collection. |
| #441 | |
| #442 | Args: |
| #443 | filters (Optional[Dict], optional): Filters to apply to the list. Defaults to None. |
| #444 | limit (int, optional): Number of vectors to return. Defaults to 100. |
| #445 | |
| #446 | Returns: |
| #447 | List[OutputData]: List of vectors. |
| #448 | """ |
| #449 | if self.index is None: |
| #450 | return [] |
| #451 | |
| #452 | results = [] |
| #453 | count = 0 |
| #454 | |
| #455 | for vector_id, payload in self.docstore.items(): |
| #456 | if filters and not self._apply_filters(payload, filters): |
| #457 | continue |
| #458 | |
| #459 | payload_copy = payload.copy() |
| #460 | |
| #461 | results.append( |
| #462 | OutputData( |
| #463 | id=vector_id, |
| #464 | score=None, |
| #465 | payload=payload_copy, |
| #466 | ) |
| #467 | ) |
| #468 | |
| #469 | count += 1 |
| #470 | if count >= limit: |
| #471 | break |
| #472 | |
| #473 | return [results] |
| #474 | |
| #475 | def reset(self): |
| #476 | """Reset the index by deleting and recreating it.""" |
| #477 | logger.warning(f"Resetting index {self.collection_name}...") |
| #478 | self.delete_col() |
| #479 | self.create_col(self.collection_name) |
| #480 |