repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | import os |
| #2 | |
| #3 | import pytest |
| #4 | |
| #5 | from embedchain import App |
| #6 | from embedchain.config import AddConfig, AppConfig, ChunkerConfig |
| #7 | from embedchain.models.data_type import DataType |
| #8 | |
| #9 | os.environ["OPENAI_API_KEY"] = "test_key" |
| #10 | |
| #11 | |
| #12 | @pytest.fixture |
| #13 | def app(mocker): |
| #14 | mocker.patch("chromadb.api.models.Collection.Collection.add") |
| #15 | return App(config=AppConfig(collect_metrics=False)) |
| #16 | |
| #17 | |
| #18 | def test_add(app): |
| #19 | app.add("https://example.com", metadata={"foo": "bar"}) |
| #20 | assert app.user_asks == [["https://example.com", "web_page", {"foo": "bar"}]] |
| #21 | |
| #22 | |
| #23 | # TODO: Make this test faster by generating a sitemap locally rather than using a remote one |
| #24 | # def test_add_sitemap(app): |
| #25 | # app.add("https://www.google.com/sitemap.xml", metadata={"foo": "bar"}) |
| #26 | # assert app.user_asks == [["https://www.google.com/sitemap.xml", "sitemap", {"foo": "bar"}]] |
| #27 | |
| #28 | |
| #29 | def test_add_forced_type(app): |
| #30 | data_type = "text" |
| #31 | app.add("https://example.com", data_type=data_type, metadata={"foo": "bar"}) |
| #32 | assert app.user_asks == [["https://example.com", data_type, {"foo": "bar"}]] |
| #33 | |
| #34 | |
| #35 | def test_dry_run(app): |
| #36 | chunker_config = ChunkerConfig(chunk_size=1, chunk_overlap=0, min_chunk_size=0) |
| #37 | text = """0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ""" |
| #38 | |
| #39 | result = app.add(source=text, config=AddConfig(chunker=chunker_config), dry_run=True) |
| #40 | |
| #41 | chunks = result["chunks"] |
| #42 | metadata = result["metadata"] |
| #43 | count = result["count"] |
| #44 | data_type = result["type"] |
| #45 | |
| #46 | assert len(chunks) == len(text) |
| #47 | assert count == len(text) |
| #48 | assert data_type == DataType.TEXT |
| #49 | for item in metadata: |
| #50 | assert isinstance(item, dict) |
| #51 | assert "local" in item["url"] |
| #52 | assert "text" in item["data_type"] |
| #53 |