repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | from embedchain.chunkers.audio import AudioChunker |
| #2 | from embedchain.chunkers.common_chunker import CommonChunker |
| #3 | from embedchain.chunkers.discourse import DiscourseChunker |
| #4 | from embedchain.chunkers.docs_site import DocsSiteChunker |
| #5 | from embedchain.chunkers.docx_file import DocxFileChunker |
| #6 | from embedchain.chunkers.excel_file import ExcelFileChunker |
| #7 | from embedchain.chunkers.gmail import GmailChunker |
| #8 | from embedchain.chunkers.google_drive import GoogleDriveChunker |
| #9 | from embedchain.chunkers.json import JSONChunker |
| #10 | from embedchain.chunkers.mdx import MdxChunker |
| #11 | from embedchain.chunkers.notion import NotionChunker |
| #12 | from embedchain.chunkers.openapi import OpenAPIChunker |
| #13 | from embedchain.chunkers.pdf_file import PdfFileChunker |
| #14 | from embedchain.chunkers.postgres import PostgresChunker |
| #15 | from embedchain.chunkers.qna_pair import QnaPairChunker |
| #16 | from embedchain.chunkers.sitemap import SitemapChunker |
| #17 | from embedchain.chunkers.slack import SlackChunker |
| #18 | from embedchain.chunkers.table import TableChunker |
| #19 | from embedchain.chunkers.text import TextChunker |
| #20 | from embedchain.chunkers.web_page import WebPageChunker |
| #21 | from embedchain.chunkers.xml import XmlChunker |
| #22 | from embedchain.chunkers.youtube_video import YoutubeVideoChunker |
| #23 | from embedchain.config.add_config import ChunkerConfig |
| #24 | |
| #25 | chunker_config = ChunkerConfig(chunk_size=500, chunk_overlap=0, length_function=len) |
| #26 | |
| #27 | chunker_common_config = { |
| #28 | DocsSiteChunker: {"chunk_size": 500, "chunk_overlap": 50, "length_function": len}, |
| #29 | DocxFileChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len}, |
| #30 | PdfFileChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len}, |
| #31 | TextChunker: {"chunk_size": 300, "chunk_overlap": 0, "length_function": len}, |
| #32 | MdxChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len}, |
| #33 | NotionChunker: {"chunk_size": 300, "chunk_overlap": 0, "length_function": len}, |
| #34 | QnaPairChunker: {"chunk_size": 300, "chunk_overlap": 0, "length_function": len}, |
| #35 | TableChunker: {"chunk_size": 300, "chunk_overlap": 0, "length_function": len}, |
| #36 | SitemapChunker: {"chunk_size": 500, "chunk_overlap": 0, "length_function": len}, |
| #37 | WebPageChunker: {"chunk_size": 2000, "chunk_overlap": 0, "length_function": len}, |
| #38 | XmlChunker: {"chunk_size": 500, "chunk_overlap": 50, "length_function": len}, |
| #39 | YoutubeVideoChunker: {"chunk_size": 2000, "chunk_overlap": 0, "length_function": len}, |
| #40 | JSONChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len}, |
| #41 | OpenAPIChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len}, |
| #42 | GmailChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len}, |
| #43 | PostgresChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len}, |
| #44 | SlackChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len}, |
| #45 | DiscourseChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len}, |
| #46 | CommonChunker: {"chunk_size": 2000, "chunk_overlap": 0, "length_function": len}, |
| #47 | GoogleDriveChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len}, |
| #48 | ExcelFileChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len}, |
| #49 | AudioChunker: {"chunk_size": 1000, "chunk_overlap": 0, "length_function": len}, |
| #50 | } |
| #51 | |
| #52 | |
| #53 | def test_default_config_values(): |
| #54 | for chunker_class, config in chunker_common_config.items(): |
| #55 | chunker = chunker_class() |
| #56 | assert chunker.text_splitter._chunk_size == config["chunk_size"] |
| #57 | assert chunker.text_splitter._chunk_overlap == config["chunk_overlap"] |
| #58 | assert chunker.text_splitter._length_function == config["length_function"] |
| #59 | |
| #60 | |
| #61 | def test_custom_config_values(): |
| #62 | for chunker_class, _ in chunker_common_config.items(): |
| #63 | chunker = chunker_class(config=chunker_config) |
| #64 | assert chunker.text_splitter._chunk_size == 500 |
| #65 | assert chunker.text_splitter._chunk_overlap == 0 |
| #66 | assert chunker.text_splitter._length_function == len |
| #67 |