repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | import hashlib |
| #2 | import os |
| #3 | |
| #4 | from dropbox.files import FileMetadata |
| #5 | |
| #6 | from embedchain.helpers.json_serializable import register_deserializable |
| #7 | from embedchain.loaders.base_loader import BaseLoader |
| #8 | from embedchain.loaders.directory_loader import DirectoryLoader |
| #9 | |
| #10 | |
| #11 | @register_deserializable |
| #12 | class DropboxLoader(BaseLoader): |
| #13 | def __init__(self): |
| #14 | access_token = os.environ.get("DROPBOX_ACCESS_TOKEN") |
| #15 | if not access_token: |
| #16 | raise ValueError("Please set the `DROPBOX_ACCESS_TOKEN` environment variable.") |
| #17 | try: |
| #18 | from dropbox import Dropbox, exceptions |
| #19 | except ImportError: |
| #20 | raise ImportError("Dropbox requires extra dependencies. Install with `pip install dropbox==11.36.2`") |
| #21 | |
| #22 | try: |
| #23 | dbx = Dropbox(access_token) |
| #24 | dbx.users_get_current_account() |
| #25 | self.dbx = dbx |
| #26 | except exceptions.AuthError as ex: |
| #27 | raise ValueError("Invalid Dropbox access token. Please verify your token and try again.") from ex |
| #28 | |
| #29 | def _download_folder(self, path: str, local_root: str) -> list[FileMetadata]: |
| #30 | """Download a folder from Dropbox and save it preserving the directory structure.""" |
| #31 | entries = self.dbx.files_list_folder(path).entries |
| #32 | for entry in entries: |
| #33 | local_path = os.path.join(local_root, entry.name) |
| #34 | if isinstance(entry, FileMetadata): |
| #35 | self.dbx.files_download_to_file(local_path, f"{path}/{entry.name}") |
| #36 | else: |
| #37 | os.makedirs(local_path, exist_ok=True) |
| #38 | self._download_folder(f"{path}/{entry.name}", local_path) |
| #39 | return entries |
| #40 | |
| #41 | def _generate_dir_id_from_all_paths(self, path: str) -> str: |
| #42 | """Generate a unique ID for a directory based on all of its paths.""" |
| #43 | entries = self.dbx.files_list_folder(path).entries |
| #44 | paths = [f"{path}/{entry.name}" for entry in entries] |
| #45 | return hashlib.sha256("".join(paths).encode()).hexdigest() |
| #46 | |
| #47 | def load_data(self, path: str): |
| #48 | """Load data from a Dropbox URL, preserving the folder structure.""" |
| #49 | root_dir = f"dropbox_{self._generate_dir_id_from_all_paths(path)}" |
| #50 | os.makedirs(root_dir, exist_ok=True) |
| #51 | |
| #52 | for entry in self.dbx.files_list_folder(path).entries: |
| #53 | local_path = os.path.join(root_dir, entry.name) |
| #54 | if isinstance(entry, FileMetadata): |
| #55 | self.dbx.files_download_to_file(local_path, f"{path}/{entry.name}") |
| #56 | else: |
| #57 | os.makedirs(local_path, exist_ok=True) |
| #58 | self._download_folder(f"{path}/{entry.name}", local_path) |
| #59 | |
| #60 | dir_loader = DirectoryLoader() |
| #61 | data = dir_loader.load_data(root_dir)["data"] |
| #62 | |
| #63 | # Clean up |
| #64 | self._clean_directory(root_dir) |
| #65 | |
| #66 | return { |
| #67 | "doc_id": hashlib.sha256(path.encode()).hexdigest(), |
| #68 | "data": data, |
| #69 | } |
| #70 | |
| #71 | def _clean_directory(self, dir_path): |
| #72 | """Recursively delete a directory and its contents.""" |
| #73 | for item in os.listdir(dir_path): |
| #74 | item_path = os.path.join(dir_path, item) |
| #75 | if os.path.isdir(item_path): |
| #76 | self._clean_directory(item_path) |
| #77 | else: |
| #78 | os.remove(item_path) |
| #79 | os.rmdir(dir_path) |
| #80 |