repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | import hashlib |
| #2 | from unittest.mock import Mock, patch |
| #3 | |
| #4 | import pytest |
| #5 | from requests import Response |
| #6 | |
| #7 | from embedchain.loaders.docs_site_loader import DocsSiteLoader |
| #8 | |
| #9 | |
| #10 | @pytest.fixture |
| #11 | def mock_requests_get(): |
| #12 | with patch("requests.get") as mock_get: |
| #13 | yield mock_get |
| #14 | |
| #15 | |
| #16 | @pytest.fixture |
| #17 | def docs_site_loader(): |
| #18 | return DocsSiteLoader() |
| #19 | |
| #20 | |
| #21 | def test_get_child_links_recursive(mock_requests_get, docs_site_loader): |
| #22 | mock_response = Mock() |
| #23 | mock_response.status_code = 200 |
| #24 | mock_response.text = """ |
| #25 | <html> |
| #26 | <a href="/page1">Page 1</a> |
| #27 | <a href="/page2">Page 2</a> |
| #28 | </html> |
| #29 | """ |
| #30 | mock_requests_get.return_value = mock_response |
| #31 | |
| #32 | docs_site_loader._get_child_links_recursive("https://example.com") |
| #33 | |
| #34 | assert len(docs_site_loader.visited_links) == 2 |
| #35 | assert "https://example.com/page1" in docs_site_loader.visited_links |
| #36 | assert "https://example.com/page2" in docs_site_loader.visited_links |
| #37 | |
| #38 | |
| #39 | def test_get_child_links_recursive_status_not_200(mock_requests_get, docs_site_loader): |
| #40 | mock_response = Mock() |
| #41 | mock_response.status_code = 404 |
| #42 | mock_requests_get.return_value = mock_response |
| #43 | |
| #44 | docs_site_loader._get_child_links_recursive("https://example.com") |
| #45 | |
| #46 | assert len(docs_site_loader.visited_links) == 0 |
| #47 | |
| #48 | |
| #49 | def test_get_all_urls(mock_requests_get, docs_site_loader): |
| #50 | mock_response = Mock() |
| #51 | mock_response.status_code = 200 |
| #52 | mock_response.text = """ |
| #53 | <html> |
| #54 | <a href="/page1">Page 1</a> |
| #55 | <a href="/page2">Page 2</a> |
| #56 | <a href="https://example.com/external">External</a> |
| #57 | </html> |
| #58 | """ |
| #59 | mock_requests_get.return_value = mock_response |
| #60 | |
| #61 | all_urls = docs_site_loader._get_all_urls("https://example.com") |
| #62 | |
| #63 | assert len(all_urls) == 3 |
| #64 | assert "https://example.com/page1" in all_urls |
| #65 | assert "https://example.com/page2" in all_urls |
| #66 | assert "https://example.com/external" in all_urls |
| #67 | |
| #68 | |
| #69 | def test_load_data_from_url(mock_requests_get, docs_site_loader): |
| #70 | mock_response = Mock() |
| #71 | mock_response.status_code = 200 |
| #72 | mock_response.content = """ |
| #73 | <html> |
| #74 | <nav> |
| #75 | <h1>Navigation</h1> |
| #76 | </nav> |
| #77 | <article class="bd-article"> |
| #78 | <p>Article Content</p> |
| #79 | </article> |
| #80 | </html> |
| #81 | """.encode() |
| #82 | mock_requests_get.return_value = mock_response |
| #83 | |
| #84 | data = docs_site_loader._load_data_from_url("https://example.com/page1") |
| #85 | |
| #86 | assert len(data) == 1 |
| #87 | assert data[0]["content"] == "Article Content" |
| #88 | assert data[0]["meta_data"]["url"] == "https://example.com/page1" |
| #89 | |
| #90 | |
| #91 | def test_load_data_from_url_status_not_200(mock_requests_get, docs_site_loader): |
| #92 | mock_response = Mock() |
| #93 | mock_response.status_code = 404 |
| #94 | mock_requests_get.return_value = mock_response |
| #95 | |
| #96 | data = docs_site_loader._load_data_from_url("https://example.com/page1") |
| #97 | |
| #98 | assert data == [] |
| #99 | assert len(data) == 0 |
| #100 | |
| #101 | |
| #102 | def test_load_data(mock_requests_get, docs_site_loader): |
| #103 | mock_response = Response() |
| #104 | mock_response.status_code = 200 |
| #105 | mock_response._content = """ |
| #106 | <html> |
| #107 | <a href="/page1">Page 1</a> |
| #108 | <a href="/page2">Page 2</a> |
| #109 | """.encode() |
| #110 | mock_requests_get.return_value = mock_response |
| #111 | |
| #112 | url = "https://example.com" |
| #113 | data = docs_site_loader.load_data(url) |
| #114 | expected_doc_id = hashlib.sha256((" ".join(docs_site_loader.visited_links) + url).encode()).hexdigest() |
| #115 | |
| #116 | assert len(data["data"]) == 2 |
| #117 | assert data["doc_id"] == expected_doc_id |
| #118 | |
| #119 | |
| #120 | def test_if_response_status_not_200(mock_requests_get, docs_site_loader): |
| #121 | mock_response = Response() |
| #122 | mock_response.status_code = 404 |
| #123 | mock_requests_get.return_value = mock_response |
| #124 | |
| #125 | url = "https://example.com" |
| #126 | data = docs_site_loader.load_data(url) |
| #127 | expected_doc_id = hashlib.sha256((" ".join(docs_site_loader.visited_links) + url).encode()).hexdigest() |
| #128 | |
| #129 | assert len(data["data"]) == 0 |
| #130 | assert data["doc_id"] == expected_doc_id |
| #131 |