repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | import logging |
| #2 | import time |
| #3 | from typing import Dict, Optional |
| #4 | |
| #5 | from pydantic import BaseModel |
| #6 | |
| #7 | from mem0.vector_stores.base import VectorStoreBase |
| #8 | |
| #9 | try: |
| #10 | import pymochow |
| #11 | from pymochow.auth.bce_credentials import BceCredentials |
| #12 | from pymochow.configuration import Configuration |
| #13 | from pymochow.exception import ServerError |
| #14 | from pymochow.model.enum import ( |
| #15 | FieldType, |
| #16 | IndexType, |
| #17 | MetricType, |
| #18 | ServerErrCode, |
| #19 | TableState, |
| #20 | ) |
| #21 | from pymochow.model.schema import ( |
| #22 | AutoBuildRowCountIncrement, |
| #23 | Field, |
| #24 | FilteringIndex, |
| #25 | HNSWParams, |
| #26 | Schema, |
| #27 | VectorIndex, |
| #28 | ) |
| #29 | from pymochow.model.table import ( |
| #30 | FloatVector, |
| #31 | Partition, |
| #32 | Row, |
| #33 | VectorSearchConfig, |
| #34 | VectorTopkSearchRequest, |
| #35 | ) |
| #36 | except ImportError: |
| #37 | raise ImportError("The 'pymochow' library is required. Please install it using 'pip install pymochow'.") |
| #38 | |
| #39 | logger = logging.getLogger(__name__) |
| #40 | |
| #41 | |
| #42 | class OutputData(BaseModel): |
| #43 | id: Optional[str] # memory id |
| #44 | score: Optional[float] # distance |
| #45 | payload: Optional[Dict] # metadata |
| #46 | |
| #47 | |
| #48 | class BaiduDB(VectorStoreBase): |
| #49 | def __init__( |
| #50 | self, |
| #51 | endpoint: str, |
| #52 | account: str, |
| #53 | api_key: str, |
| #54 | database_name: str, |
| #55 | table_name: str, |
| #56 | embedding_model_dims: int, |
| #57 | metric_type: MetricType, |
| #58 | ) -> None: |
| #59 | """Initialize the BaiduDB database. |
| #60 | |
| #61 | Args: |
| #62 | endpoint (str): Endpoint URL for Baidu VectorDB. |
| #63 | account (str): Account for Baidu VectorDB. |
| #64 | api_key (str): API Key for Baidu VectorDB. |
| #65 | database_name (str): Name of the database. |
| #66 | table_name (str): Name of the table. |
| #67 | embedding_model_dims (int): Dimensions of the embedding model. |
| #68 | metric_type (MetricType): Metric type for similarity search. |
| #69 | """ |
| #70 | self.endpoint = endpoint |
| #71 | self.account = account |
| #72 | self.api_key = api_key |
| #73 | self.database_name = database_name |
| #74 | self.table_name = table_name |
| #75 | self.embedding_model_dims = embedding_model_dims |
| #76 | self.metric_type = metric_type |
| #77 | |
| #78 | # Initialize Mochow client |
| #79 | config = Configuration(credentials=BceCredentials(account, api_key), endpoint=endpoint) |
| #80 | self.client = pymochow.MochowClient(config) |
| #81 | |
| #82 | # Ensure database and table exist |
| #83 | self._create_database_if_not_exists() |
| #84 | self.create_col( |
| #85 | name=self.table_name, |
| #86 | vector_size=self.embedding_model_dims, |
| #87 | distance=self.metric_type, |
| #88 | ) |
| #89 | |
| #90 | def _create_database_if_not_exists(self): |
| #91 | """Create database if it doesn't exist.""" |
| #92 | try: |
| #93 | # Check if database exists |
| #94 | databases = self.client.list_databases() |
| #95 | db_exists = any(db.database_name == self.database_name for db in databases) |
| #96 | if not db_exists: |
| #97 | self._database = self.client.create_database(self.database_name) |
| #98 | logger.info(f"Created database: {self.database_name}") |
| #99 | else: |
| #100 | self._database = self.client.database(self.database_name) |
| #101 | logger.info(f"Database {self.database_name} already exists") |
| #102 | except Exception as e: |
| #103 | logger.error(f"Error creating database: {e}") |
| #104 | raise |
| #105 | |
| #106 | def create_col(self, name, vector_size, distance): |
| #107 | """Create a new table. |
| #108 | |
| #109 | Args: |
| #110 | name (str): Name of the table to create. |
| #111 | vector_size (int): Dimension of the vector. |
| #112 | distance (str): Metric type for similarity search. |
| #113 | """ |
| #114 | # Check if table already exists |
| #115 | try: |
| #116 | tables = self._database.list_table() |
| #117 | table_exists = any(table.table_name == name for table in tables) |
| #118 | if table_exists: |
| #119 | logger.info(f"Table {name} already exists. Skipping creation.") |
| #120 | self._table = self._database.describe_table(name) |
| #121 | return |
| #122 | |
| #123 | # Convert distance string to MetricType enum |
| #124 | metric_type = None |
| #125 | for k, v in MetricType.__members__.items(): |
| #126 | if k == distance: |
| #127 | metric_type = v |
| #128 | if metric_type is None: |
| #129 | raise ValueError(f"Unsupported metric_type: {distance}") |
| #130 | |
| #131 | # Define table schema |
| #132 | fields = [ |
| #133 | Field( |
| #134 | "id", FieldType.STRING, primary_key=True, partition_key=True, auto_increment=False, not_null=True |
| #135 | ), |
| #136 | Field("vector", FieldType.FLOAT_VECTOR, dimension=vector_size), |
| #137 | Field("metadata", FieldType.JSON), |
| #138 | ] |
| #139 | |
| #140 | # Create vector index |
| #141 | indexes = [ |
| #142 | VectorIndex( |
| #143 | index_name="vector_idx", |
| #144 | index_type=IndexType.HNSW, |
| #145 | field="vector", |
| #146 | metric_type=metric_type, |
| #147 | params=HNSWParams(m=16, efconstruction=200), |
| #148 | auto_build=True, |
| #149 | auto_build_index_policy=AutoBuildRowCountIncrement(row_count_increment=10000), |
| #150 | ), |
| #151 | FilteringIndex(index_name="metadata_filtering_idx", fields=["metadata"]), |
| #152 | ] |
| #153 | |
| #154 | schema = Schema(fields=fields, indexes=indexes) |
| #155 | |
| #156 | # Create table |
| #157 | self._table = self._database.create_table( |
| #158 | table_name=name, replication=3, partition=Partition(partition_num=1), schema=schema |
| #159 | ) |
| #160 | logger.info(f"Created table: {name}") |
| #161 | |
| #162 | # Wait for table to be ready |
| #163 | while True: |
| #164 | time.sleep(2) |
| #165 | table = self._database.describe_table(name) |
| #166 | if table.state == TableState.NORMAL: |
| #167 | logger.info(f"Table {name} is ready.") |
| #168 | break |
| #169 | logger.info(f"Waiting for table {name} to be ready, current state: {table.state}") |
| #170 | self._table = table |
| #171 | except Exception as e: |
| #172 | logger.error(f"Error creating table: {e}") |
| #173 | raise |
| #174 | |
| #175 | def insert(self, vectors, payloads=None, ids=None): |
| #176 | """Insert vectors into the table. |
| #177 | |
| #178 | Args: |
| #179 | vectors (List[List[float]]): List of vectors to insert. |
| #180 | payloads (List[Dict], optional): List of payloads corresponding to vectors. |
| #181 | ids (List[str], optional): List of IDs corresponding to vectors. |
| #182 | """ |
| #183 | # Prepare data for insertion |
| #184 | for idx, vector, metadata in zip(ids, vectors, payloads): |
| #185 | row = Row(id=idx, vector=vector, metadata=metadata) |
| #186 | self._table.upsert(rows=[row]) |
| #187 | |
| #188 | def search(self, query: str, vectors: list, limit: int = 5, filters: dict = None) -> list: |
| #189 | """ |
| #190 | Search for similar vectors. |
| #191 | |
| #192 | Args: |
| #193 | query (str): Query string. |
| #194 | vectors (List[float]): Query vector. |
| #195 | limit (int, optional): Number of results to return. Defaults to 5. |
| #196 | filters (Dict, optional): Filters to apply to the search. Defaults to None. |
| #197 | |
| #198 | Returns: |
| #199 | list: Search results. |
| #200 | """ |
| #201 | # Add filters if provided |
| #202 | search_filter = None |
| #203 | if filters: |
| #204 | search_filter = self._create_filter(filters) |
| #205 | |
| #206 | # Create AnnSearch for vector search |
| #207 | request = VectorTopkSearchRequest( |
| #208 | vector_field="vector", |
| #209 | vector=FloatVector(vectors), |
| #210 | limit=limit, |
| #211 | filter=search_filter, |
| #212 | config=VectorSearchConfig(ef=200), |
| #213 | ) |
| #214 | |
| #215 | # Perform search |
| #216 | projections = ["id", "metadata"] |
| #217 | res = self._table.vector_search(request=request, projections=projections) |
| #218 | |
| #219 | # Parse results |
| #220 | output = [] |
| #221 | for row in res.rows: |
| #222 | row_data = row.get("row", {}) |
| #223 | output_data = OutputData( |
| #224 | id=row_data.get("id"), score=row.get("score", 0.0), payload=row_data.get("metadata", {}) |
| #225 | ) |
| #226 | output.append(output_data) |
| #227 | |
| #228 | return output |
| #229 | |
| #230 | def delete(self, vector_id): |
| #231 | """ |
| #232 | Delete a vector by ID. |
| #233 | |
| #234 | Args: |
| #235 | vector_id (str): ID of the vector to delete. |
| #236 | """ |
| #237 | self._table.delete(primary_key={"id": vector_id}) |
| #238 | |
| #239 | def update(self, vector_id=None, vector=None, payload=None): |
| #240 | """ |
| #241 | Update a vector and its payload. |
| #242 | |
| #243 | Args: |
| #244 | vector_id (str): ID of the vector to update. |
| #245 | vector (List[float], optional): Updated vector. |
| #246 | payload (Dict, optional): Updated payload. |
| #247 | """ |
| #248 | row = Row(id=vector_id, vector=vector, metadata=payload) |
| #249 | self._table.upsert(rows=[row]) |
| #250 | |
| #251 | def get(self, vector_id): |
| #252 | """ |
| #253 | Retrieve a vector by ID. |
| #254 | |
| #255 | Args: |
| #256 | vector_id (str): ID of the vector to retrieve. |
| #257 | |
| #258 | Returns: |
| #259 | OutputData: Retrieved vector. |
| #260 | """ |
| #261 | projections = ["id", "metadata"] |
| #262 | result = self._table.query(primary_key={"id": vector_id}, projections=projections) |
| #263 | row = result.row |
| #264 | return OutputData(id=row.get("id"), score=None, payload=row.get("metadata", {})) |
| #265 | |
| #266 | def list_cols(self): |
| #267 | """ |
| #268 | List all tables (collections). |
| #269 | |
| #270 | Returns: |
| #271 | List[str]: List of table names. |
| #272 | """ |
| #273 | tables = self._database.list_table() |
| #274 | return [table.table_name for table in tables] |
| #275 | |
| #276 | def delete_col(self): |
| #277 | """Delete the table.""" |
| #278 | try: |
| #279 | tables = self._database.list_table() |
| #280 | |
| #281 | # skip drop table if table not exists |
| #282 | table_exists = any(table.table_name == self.table_name for table in tables) |
| #283 | if not table_exists: |
| #284 | logger.info(f"Table {self.table_name} does not exist, skipping deletion") |
| #285 | return |
| #286 | |
| #287 | # Delete the table |
| #288 | self._database.drop_table(self.table_name) |
| #289 | logger.info(f"Initiated deletion of table {self.table_name}") |
| #290 | |
| #291 | # Wait for table to be completely deleted |
| #292 | while True: |
| #293 | time.sleep(2) |
| #294 | try: |
| #295 | self._database.describe_table(self.table_name) |
| #296 | logger.info(f"Waiting for table {self.table_name} to be deleted...") |
| #297 | except ServerError as e: |
| #298 | if e.code == ServerErrCode.TABLE_NOT_EXIST: |
| #299 | logger.info(f"Table {self.table_name} has been completely deleted") |
| #300 | break |
| #301 | logger.error(f"Error checking table status: {e}") |
| #302 | raise |
| #303 | except Exception as e: |
| #304 | logger.error(f"Error deleting table: {e}") |
| #305 | raise |
| #306 | |
| #307 | def col_info(self): |
| #308 | """ |
| #309 | Get information about the table. |
| #310 | |
| #311 | Returns: |
| #312 | Dict[str, Any]: Table information. |
| #313 | """ |
| #314 | return self._table.stats() |
| #315 | |
| #316 | def list(self, filters: dict = None, limit: int = 100) -> list: |
| #317 | """ |
| #318 | List all vectors in the table. |
| #319 | |
| #320 | Args: |
| #321 | filters (Dict, optional): Filters to apply to the list. |
| #322 | limit (int, optional): Number of vectors to return. Defaults to 100. |
| #323 | |
| #324 | Returns: |
| #325 | List[OutputData]: List of vectors. |
| #326 | """ |
| #327 | projections = ["id", "metadata"] |
| #328 | list_filter = self._create_filter(filters) if filters else None |
| #329 | result = self._table.select(filter=list_filter, projections=projections, limit=limit) |
| #330 | |
| #331 | memories = [] |
| #332 | for row in result.rows: |
| #333 | obj = OutputData(id=row.get("id"), score=None, payload=row.get("metadata", {})) |
| #334 | memories.append(obj) |
| #335 | |
| #336 | return [memories] |
| #337 | |
| #338 | def reset(self): |
| #339 | """Reset the table by deleting and recreating it.""" |
| #340 | logger.warning(f"Resetting table {self.table_name}...") |
| #341 | try: |
| #342 | self.delete_col() |
| #343 | self.create_col( |
| #344 | name=self.table_name, |
| #345 | vector_size=self.embedding_model_dims, |
| #346 | distance=self.metric_type, |
| #347 | ) |
| #348 | except Exception as e: |
| #349 | logger.warning(f"Error resetting table: {e}") |
| #350 | raise |
| #351 | |
| #352 | def _create_filter(self, filters: dict) -> str: |
| #353 | """ |
| #354 | Create filter expression for queries. |
| #355 | |
| #356 | Args: |
| #357 | filters (dict): Filter conditions. |
| #358 | |
| #359 | Returns: |
| #360 | str: Filter expression. |
| #361 | """ |
| #362 | conditions = [] |
| #363 | for key, value in filters.items(): |
| #364 | if isinstance(value, str): |
| #365 | conditions.append(f'metadata["{key}"] = "{value}"') |
| #366 | else: |
| #367 | conditions.append(f'metadata["{key}"] = {value}') |
| #368 | return " AND ".join(conditions) |
| #369 |