repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | import logging |
| #2 | from typing import Dict, List, Optional |
| #3 | |
| #4 | from pydantic import BaseModel |
| #5 | |
| #6 | try: |
| #7 | import chromadb |
| #8 | from chromadb.config import Settings |
| #9 | except ImportError: |
| #10 | raise ImportError("The 'chromadb' library is required. Please install it using 'pip install chromadb'.") |
| #11 | |
| #12 | from mem0.vector_stores.base import VectorStoreBase |
| #13 | |
| #14 | logger = logging.getLogger(__name__) |
| #15 | |
| #16 | |
| #17 | class OutputData(BaseModel): |
| #18 | id: Optional[str] # memory id |
| #19 | score: Optional[float] # distance |
| #20 | payload: Optional[Dict] # metadata |
| #21 | |
| #22 | |
| #23 | class ChromaDB(VectorStoreBase): |
| #24 | def __init__( |
| #25 | self, |
| #26 | collection_name: str, |
| #27 | client: Optional[chromadb.Client] = None, |
| #28 | host: Optional[str] = None, |
| #29 | port: Optional[int] = None, |
| #30 | path: Optional[str] = None, |
| #31 | api_key: Optional[str] = None, |
| #32 | tenant: Optional[str] = None, |
| #33 | ): |
| #34 | """ |
| #35 | Initialize the Chromadb vector store. |
| #36 | |
| #37 | Args: |
| #38 | collection_name (str): Name of the collection. |
| #39 | client (chromadb.Client, optional): Existing chromadb client instance. Defaults to None. |
| #40 | host (str, optional): Host address for chromadb server. Defaults to None. |
| #41 | port (int, optional): Port for chromadb server. Defaults to None. |
| #42 | path (str, optional): Path for local chromadb database. Defaults to None. |
| #43 | api_key (str, optional): ChromaDB Cloud API key. Defaults to None. |
| #44 | tenant (str, optional): ChromaDB Cloud tenant ID. Defaults to None. |
| #45 | """ |
| #46 | if client: |
| #47 | self.client = client |
| #48 | elif api_key and tenant: |
| #49 | # Initialize ChromaDB Cloud client |
| #50 | logger.info("Initializing ChromaDB Cloud client") |
| #51 | self.client = chromadb.CloudClient( |
| #52 | api_key=api_key, |
| #53 | tenant=tenant, |
| #54 | database="mem0" # Use fixed database name for cloud |
| #55 | ) |
| #56 | else: |
| #57 | # Initialize local or server client |
| #58 | self.settings = Settings(anonymized_telemetry=False) |
| #59 | |
| #60 | if host and port: |
| #61 | self.settings.chroma_server_host = host |
| #62 | self.settings.chroma_server_http_port = port |
| #63 | self.settings.chroma_api_impl = "chromadb.api.fastapi.FastAPI" |
| #64 | else: |
| #65 | if path is None: |
| #66 | path = "db" |
| #67 | |
| #68 | self.settings.persist_directory = path |
| #69 | self.settings.is_persistent = True |
| #70 | |
| #71 | self.client = chromadb.Client(self.settings) |
| #72 | |
| #73 | self.collection_name = collection_name |
| #74 | self.collection = self.create_col(collection_name) |
| #75 | |
| #76 | def _parse_output(self, data: Dict) -> List[OutputData]: |
| #77 | """ |
| #78 | Parse the output data. |
| #79 | |
| #80 | Args: |
| #81 | data (Dict): Output data. |
| #82 | |
| #83 | Returns: |
| #84 | List[OutputData]: Parsed output data. |
| #85 | """ |
| #86 | keys = ["ids", "distances", "metadatas"] |
| #87 | values = [] |
| #88 | |
| #89 | for key in keys: |
| #90 | value = data.get(key, []) |
| #91 | if isinstance(value, list) and value and isinstance(value[0], list): |
| #92 | value = value[0] |
| #93 | values.append(value) |
| #94 | |
| #95 | ids, distances, metadatas = values |
| #96 | max_length = max(len(v) for v in values if isinstance(v, list) and v is not None) |
| #97 | |
| #98 | result = [] |
| #99 | for i in range(max_length): |
| #100 | entry = OutputData( |
| #101 | id=ids[i] if isinstance(ids, list) and ids and i < len(ids) else None, |
| #102 | score=(distances[i] if isinstance(distances, list) and distances and i < len(distances) else None), |
| #103 | payload=(metadatas[i] if isinstance(metadatas, list) and metadatas and i < len(metadatas) else None), |
| #104 | ) |
| #105 | result.append(entry) |
| #106 | |
| #107 | return result |
| #108 | |
| #109 | def create_col(self, name: str, embedding_fn: Optional[callable] = None): |
| #110 | """ |
| #111 | Create a new collection. |
| #112 | |
| #113 | Args: |
| #114 | name (str): Name of the collection. |
| #115 | embedding_fn (Optional[callable]): Embedding function to use. Defaults to None. |
| #116 | |
| #117 | Returns: |
| #118 | chromadb.Collection: The created or retrieved collection. |
| #119 | """ |
| #120 | collection = self.client.get_or_create_collection( |
| #121 | name=name, |
| #122 | embedding_function=embedding_fn, |
| #123 | ) |
| #124 | return collection |
| #125 | |
| #126 | def insert( |
| #127 | self, |
| #128 | vectors: List[list], |
| #129 | payloads: Optional[List[Dict]] = None, |
| #130 | ids: Optional[List[str]] = None, |
| #131 | ): |
| #132 | """ |
| #133 | Insert vectors into a collection. |
| #134 | |
| #135 | Args: |
| #136 | vectors (List[list]): List of vectors to insert. |
| #137 | payloads (Optional[List[Dict]], optional): List of payloads corresponding to vectors. Defaults to None. |
| #138 | ids (Optional[List[str]], optional): List of IDs corresponding to vectors. Defaults to None. |
| #139 | """ |
| #140 | logger.info(f"Inserting {len(vectors)} vectors into collection {self.collection_name}") |
| #141 | self.collection.add(ids=ids, embeddings=vectors, metadatas=payloads) |
| #142 | |
| #143 | def search( |
| #144 | self, query: str, vectors: List[list], limit: int = 5, filters: Optional[Dict] = None |
| #145 | ) -> List[OutputData]: |
| #146 | """ |
| #147 | Search for similar vectors. |
| #148 | |
| #149 | Args: |
| #150 | query (str): Query. |
| #151 | vectors (List[list]): List of vectors to search. |
| #152 | limit (int, optional): Number of results to return. Defaults to 5. |
| #153 | filters (Optional[Dict], optional): Filters to apply to the search. Defaults to None. |
| #154 | |
| #155 | Returns: |
| #156 | List[OutputData]: Search results. |
| #157 | """ |
| #158 | where_clause = self._generate_where_clause(filters) if filters else None |
| #159 | results = self.collection.query(query_embeddings=vectors, where=where_clause, n_results=limit) |
| #160 | final_results = self._parse_output(results) |
| #161 | return final_results |
| #162 | |
| #163 | def delete(self, vector_id: str): |
| #164 | """ |
| #165 | Delete a vector by ID. |
| #166 | |
| #167 | Args: |
| #168 | vector_id (str): ID of the vector to delete. |
| #169 | """ |
| #170 | self.collection.delete(ids=vector_id) |
| #171 | |
| #172 | def update( |
| #173 | self, |
| #174 | vector_id: str, |
| #175 | vector: Optional[List[float]] = None, |
| #176 | payload: Optional[Dict] = None, |
| #177 | ): |
| #178 | """ |
| #179 | Update a vector and its payload. |
| #180 | |
| #181 | Args: |
| #182 | vector_id (str): ID of the vector to update. |
| #183 | vector (Optional[List[float]], optional): Updated vector. Defaults to None. |
| #184 | payload (Optional[Dict], optional): Updated payload. Defaults to None. |
| #185 | """ |
| #186 | self.collection.update(ids=vector_id, embeddings=vector, metadatas=payload) |
| #187 | |
| #188 | def get(self, vector_id: str) -> OutputData: |
| #189 | """ |
| #190 | Retrieve a vector by ID. |
| #191 | |
| #192 | Args: |
| #193 | vector_id (str): ID of the vector to retrieve. |
| #194 | |
| #195 | Returns: |
| #196 | OutputData: Retrieved vector. |
| #197 | """ |
| #198 | result = self.collection.get(ids=[vector_id]) |
| #199 | return self._parse_output(result)[0] |
| #200 | |
| #201 | def list_cols(self) -> List[chromadb.Collection]: |
| #202 | """ |
| #203 | List all collections. |
| #204 | |
| #205 | Returns: |
| #206 | List[chromadb.Collection]: List of collections. |
| #207 | """ |
| #208 | return self.client.list_collections() |
| #209 | |
| #210 | def delete_col(self): |
| #211 | """ |
| #212 | Delete a collection. |
| #213 | """ |
| #214 | self.client.delete_collection(name=self.collection_name) |
| #215 | |
| #216 | def col_info(self) -> Dict: |
| #217 | """ |
| #218 | Get information about a collection. |
| #219 | |
| #220 | Returns: |
| #221 | Dict: Collection information. |
| #222 | """ |
| #223 | return self.client.get_collection(name=self.collection_name) |
| #224 | |
| #225 | def list(self, filters: Optional[Dict] = None, limit: int = 100) -> List[OutputData]: |
| #226 | """ |
| #227 | List all vectors in a collection. |
| #228 | |
| #229 | Args: |
| #230 | filters (Optional[Dict], optional): Filters to apply to the list. Defaults to None. |
| #231 | limit (int, optional): Number of vectors to return. Defaults to 100. |
| #232 | |
| #233 | Returns: |
| #234 | List[OutputData]: List of vectors. |
| #235 | """ |
| #236 | where_clause = self._generate_where_clause(filters) if filters else None |
| #237 | results = self.collection.get(where=where_clause, limit=limit) |
| #238 | return [self._parse_output(results)] |
| #239 | |
| #240 | def reset(self): |
| #241 | """Reset the index by deleting and recreating it.""" |
| #242 | logger.warning(f"Resetting index {self.collection_name}...") |
| #243 | self.delete_col() |
| #244 | self.collection = self.create_col(self.collection_name) |
| #245 | |
| #246 | @staticmethod |
| #247 | def _generate_where_clause(where: dict[str, any]) -> dict[str, any]: |
| #248 | """ |
| #249 | Generate a properly formatted where clause for ChromaDB. |
| #250 | |
| #251 | Args: |
| #252 | where (dict[str, any]): The filter conditions. |
| #253 | |
| #254 | Returns: |
| #255 | dict[str, any]: Properly formatted where clause for ChromaDB. |
| #256 | """ |
| #257 | if where is None: |
| #258 | return {} |
| #259 | |
| #260 | def convert_condition(key: str, value: any) -> dict: |
| #261 | """Convert universal filter format to ChromaDB format.""" |
| #262 | if value == "*": |
| #263 | # Wildcard - match any value (ChromaDB doesn't have direct wildcard, so we skip this filter) |
| #264 | return None |
| #265 | elif isinstance(value, dict): |
| #266 | # Handle comparison operators |
| #267 | chroma_condition = {} |
| #268 | for op, val in value.items(): |
| #269 | if op == "eq": |
| #270 | chroma_condition[key] = {"$eq": val} |
| #271 | elif op == "ne": |
| #272 | chroma_condition[key] = {"$ne": val} |
| #273 | elif op == "gt": |
| #274 | chroma_condition[key] = {"$gt": val} |
| #275 | elif op == "gte": |
| #276 | chroma_condition[key] = {"$gte": val} |
| #277 | elif op == "lt": |
| #278 | chroma_condition[key] = {"$lt": val} |
| #279 | elif op == "lte": |
| #280 | chroma_condition[key] = {"$lte": val} |
| #281 | elif op == "in": |
| #282 | chroma_condition[key] = {"$in": val} |
| #283 | elif op == "nin": |
| #284 | chroma_condition[key] = {"$nin": val} |
| #285 | elif op in ["contains", "icontains"]: |
| #286 | # ChromaDB doesn't support contains, fallback to equality |
| #287 | chroma_condition[key] = {"$eq": val} |
| #288 | else: |
| #289 | # Unknown operator, treat as equality |
| #290 | chroma_condition[key] = {"$eq": val} |
| #291 | return chroma_condition |
| #292 | else: |
| #293 | # Simple equality |
| #294 | return {key: {"$eq": value}} |
| #295 | |
| #296 | processed_filters = [] |
| #297 | |
| #298 | for key, value in where.items(): |
| #299 | if key == "$or": |
| #300 | # Handle OR conditions |
| #301 | or_conditions = [] |
| #302 | for condition in value: |
| #303 | or_condition = {} |
| #304 | for sub_key, sub_value in condition.items(): |
| #305 | converted = convert_condition(sub_key, sub_value) |
| #306 | if converted: |
| #307 | or_condition.update(converted) |
| #308 | if or_condition: |
| #309 | or_conditions.append(or_condition) |
| #310 | |
| #311 | if len(or_conditions) > 1: |
| #312 | processed_filters.append({"$or": or_conditions}) |
| #313 | elif len(or_conditions) == 1: |
| #314 | processed_filters.append(or_conditions[0]) |
| #315 | |
| #316 | elif key == "$not": |
| #317 | # Handle NOT conditions - ChromaDB doesn't have direct NOT, so we'll skip for now |
| #318 | continue |
| #319 | |
| #320 | else: |
| #321 | # Regular condition |
| #322 | converted = convert_condition(key, value) |
| #323 | if converted: |
| #324 | processed_filters.append(converted) |
| #325 | |
| #326 | # Return appropriate format based on number of conditions |
| #327 | if len(processed_filters) == 0: |
| #328 | return {} |
| #329 | elif len(processed_filters) == 1: |
| #330 | return processed_filters[0] |
| #331 | else: |
| #332 | return {"$and": processed_filters} |
| #333 |