repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | import concurrent.futures |
| #2 | import hashlib |
| #3 | import logging |
| #4 | |
| #5 | from tqdm import tqdm |
| #6 | |
| #7 | from embedchain.loaders.base_loader import BaseLoader |
| #8 | from embedchain.loaders.youtube_video import YoutubeVideoLoader |
| #9 | |
| #10 | logger = logging.getLogger(__name__) |
| #11 | |
| #12 | |
| #13 | class YoutubeChannelLoader(BaseLoader): |
| #14 | """Loader for youtube channel.""" |
| #15 | |
| #16 | def load_data(self, channel_name): |
| #17 | try: |
| #18 | import yt_dlp |
| #19 | except ImportError as e: |
| #20 | raise ValueError( |
| #21 | "YoutubeChannelLoader requires extra dependencies. Install with `pip install yt_dlp==2023.11.14 youtube-transcript-api==0.6.1`" # noqa: E501 |
| #22 | ) from e |
| #23 | |
| #24 | data = [] |
| #25 | data_urls = [] |
| #26 | youtube_url = f"https://www.youtube.com/{channel_name}/videos" |
| #27 | youtube_video_loader = YoutubeVideoLoader() |
| #28 | |
| #29 | def _get_yt_video_links(): |
| #30 | try: |
| #31 | ydl_opts = { |
| #32 | "quiet": True, |
| #33 | "extract_flat": True, |
| #34 | } |
| #35 | with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
| #36 | info_dict = ydl.extract_info(youtube_url, download=False) |
| #37 | if "entries" in info_dict: |
| #38 | videos = [entry["url"] for entry in info_dict["entries"]] |
| #39 | return videos |
| #40 | except Exception: |
| #41 | logger.error(f"Failed to fetch youtube videos for channel: {channel_name}") |
| #42 | return [] |
| #43 | |
| #44 | def _load_yt_video(video_link): |
| #45 | try: |
| #46 | each_load_data = youtube_video_loader.load_data(video_link) |
| #47 | if each_load_data: |
| #48 | return each_load_data.get("data") |
| #49 | except Exception as e: |
| #50 | logger.error(f"Failed to load youtube video {video_link}: {e}") |
| #51 | return None |
| #52 | |
| #53 | def _add_youtube_channel(): |
| #54 | video_links = _get_yt_video_links() |
| #55 | logger.info("Loading videos from youtube channel...") |
| #56 | with concurrent.futures.ThreadPoolExecutor() as executor: |
| #57 | # Submitting all tasks and storing the future object with the video link |
| #58 | future_to_video = { |
| #59 | executor.submit(_load_yt_video, video_link): video_link for video_link in video_links |
| #60 | } |
| #61 | |
| #62 | for future in tqdm( |
| #63 | concurrent.futures.as_completed(future_to_video), total=len(video_links), desc="Processing videos" |
| #64 | ): |
| #65 | video = future_to_video[future] |
| #66 | try: |
| #67 | results = future.result() |
| #68 | if results: |
| #69 | data.extend(results) |
| #70 | data_urls.extend([result.get("meta_data").get("url") for result in results]) |
| #71 | except Exception as e: |
| #72 | logger.error(f"Failed to process youtube video {video}: {e}") |
| #73 | |
| #74 | _add_youtube_channel() |
| #75 | doc_id = hashlib.sha256((youtube_url + ", ".join(data_urls)).encode()).hexdigest() |
| #76 | return { |
| #77 | "doc_id": doc_id, |
| #78 | "data": data, |
| #79 | } |
| #80 |