repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | import tempfile |
| #2 | import unittest |
| #3 | from unittest.mock import patch |
| #4 | |
| #5 | from embedchain.models.data_type import DataType |
| #6 | from embedchain.utils.misc import detect_datatype |
| #7 | |
| #8 | |
| #9 | class TestApp(unittest.TestCase): |
| #10 | """Test that the datatype detection is working, based on the input.""" |
| #11 | |
| #12 | def test_detect_datatype_youtube(self): |
| #13 | self.assertEqual(detect_datatype("https://www.youtube.com/watch?v=dQw4w9WgXcQ"), DataType.YOUTUBE_VIDEO) |
| #14 | self.assertEqual(detect_datatype("https://m.youtube.com/watch?v=dQw4w9WgXcQ"), DataType.YOUTUBE_VIDEO) |
| #15 | self.assertEqual( |
| #16 | detect_datatype("https://www.youtube-nocookie.com/watch?v=dQw4w9WgXcQ"), DataType.YOUTUBE_VIDEO |
| #17 | ) |
| #18 | self.assertEqual(detect_datatype("https://vid.plus/watch?v=dQw4w9WgXcQ"), DataType.YOUTUBE_VIDEO) |
| #19 | self.assertEqual(detect_datatype("https://youtu.be/dQw4w9WgXcQ"), DataType.YOUTUBE_VIDEO) |
| #20 | |
| #21 | def test_detect_datatype_local_file(self): |
| #22 | self.assertEqual(detect_datatype("file:///home/user/file.txt"), DataType.WEB_PAGE) |
| #23 | |
| #24 | def test_detect_datatype_pdf(self): |
| #25 | self.assertEqual(detect_datatype("https://www.example.com/document.pdf"), DataType.PDF_FILE) |
| #26 | |
| #27 | def test_detect_datatype_local_pdf(self): |
| #28 | self.assertEqual(detect_datatype("file:///home/user/document.pdf"), DataType.PDF_FILE) |
| #29 | |
| #30 | def test_detect_datatype_xml(self): |
| #31 | self.assertEqual(detect_datatype("https://www.example.com/sitemap.xml"), DataType.SITEMAP) |
| #32 | |
| #33 | def test_detect_datatype_local_xml(self): |
| #34 | self.assertEqual(detect_datatype("file:///home/user/sitemap.xml"), DataType.SITEMAP) |
| #35 | |
| #36 | def test_detect_datatype_docx(self): |
| #37 | self.assertEqual(detect_datatype("https://www.example.com/document.docx"), DataType.DOCX) |
| #38 | |
| #39 | def test_detect_datatype_local_docx(self): |
| #40 | self.assertEqual(detect_datatype("file:///home/user/document.docx"), DataType.DOCX) |
| #41 | |
| #42 | def test_detect_data_type_json(self): |
| #43 | self.assertEqual(detect_datatype("https://www.example.com/data.json"), DataType.JSON) |
| #44 | |
| #45 | def test_detect_data_type_local_json(self): |
| #46 | self.assertEqual(detect_datatype("file:///home/user/data.json"), DataType.JSON) |
| #47 | |
| #48 | @patch("os.path.isfile") |
| #49 | def test_detect_datatype_regular_filesystem_docx(self, mock_isfile): |
| #50 | with tempfile.NamedTemporaryFile(suffix=".docx", delete=True) as tmp: |
| #51 | mock_isfile.return_value = True |
| #52 | self.assertEqual(detect_datatype(tmp.name), DataType.DOCX) |
| #53 | |
| #54 | def test_detect_datatype_docs_site(self): |
| #55 | self.assertEqual(detect_datatype("https://docs.example.com"), DataType.DOCS_SITE) |
| #56 | |
| #57 | def test_detect_datatype_docs_sitein_path(self): |
| #58 | self.assertEqual(detect_datatype("https://www.example.com/docs/index.html"), DataType.DOCS_SITE) |
| #59 | self.assertNotEqual(detect_datatype("file:///var/www/docs/index.html"), DataType.DOCS_SITE) # NOT equal |
| #60 | |
| #61 | def test_detect_datatype_web_page(self): |
| #62 | self.assertEqual(detect_datatype("https://nav.al/agi"), DataType.WEB_PAGE) |
| #63 | |
| #64 | def test_detect_datatype_invalid_url(self): |
| #65 | self.assertEqual(detect_datatype("not a url"), DataType.TEXT) |
| #66 | |
| #67 | def test_detect_datatype_qna_pair(self): |
| #68 | self.assertEqual( |
| #69 | detect_datatype(("Question?", "Answer. Content of the string is irrelevant.")), DataType.QNA_PAIR |
| #70 | ) # |
| #71 | |
| #72 | def test_detect_datatype_qna_pair_types(self): |
| #73 | """Test that a QnA pair needs to be a tuple of length two, and both items have to be strings.""" |
| #74 | with self.assertRaises(TypeError): |
| #75 | self.assertNotEqual( |
| #76 | detect_datatype(("How many planets are in our solar system?", 8)), DataType.QNA_PAIR |
| #77 | ) # NOT equal |
| #78 | |
| #79 | def test_detect_datatype_text(self): |
| #80 | self.assertEqual(detect_datatype("Just some text."), DataType.TEXT) |
| #81 | |
| #82 | def test_detect_datatype_non_string_error(self): |
| #83 | """Test type error if the value passed is not a string, and not a valid non-string data_type""" |
| #84 | with self.assertRaises(TypeError): |
| #85 | detect_datatype(["foo", "bar"]) |
| #86 | |
| #87 | @patch("os.path.isfile") |
| #88 | def test_detect_datatype_regular_filesystem_file_txt(self, mock_isfile): |
| #89 | with tempfile.NamedTemporaryFile(suffix=".txt", delete=True) as tmp: |
| #90 | mock_isfile.return_value = True |
| #91 | self.assertEqual(detect_datatype(tmp.name), DataType.TEXT_FILE) |
| #92 | |
| #93 | def test_detect_datatype_regular_filesystem_no_file(self): |
| #94 | """Test that if a filepath is not actually an existing file, it is not handled as a file path.""" |
| #95 | self.assertEqual(detect_datatype("/var/not-an-existing-file.txt"), DataType.TEXT) |
| #96 | |
| #97 | def test_doc_examples_quickstart(self): |
| #98 | """Test examples used in the documentation.""" |
| #99 | self.assertEqual(detect_datatype("https://en.wikipedia.org/wiki/Elon_Musk"), DataType.WEB_PAGE) |
| #100 | self.assertEqual(detect_datatype("https://www.tesla.com/elon-musk"), DataType.WEB_PAGE) |
| #101 | |
| #102 | def test_doc_examples_introduction(self): |
| #103 | """Test examples used in the documentation.""" |
| #104 | self.assertEqual(detect_datatype("https://www.youtube.com/watch?v=3qHkcs3kG44"), DataType.YOUTUBE_VIDEO) |
| #105 | self.assertEqual( |
| #106 | detect_datatype( |
| #107 | "https://navalmanack.s3.amazonaws.com/Eric-Jorgenson_The-Almanack-of-Naval-Ravikant_Final.pdf" |
| #108 | ), |
| #109 | DataType.PDF_FILE, |
| #110 | ) |
| #111 | self.assertEqual(detect_datatype("https://nav.al/feedback"), DataType.WEB_PAGE) |
| #112 | |
| #113 | def test_doc_examples_app_types(self): |
| #114 | """Test examples used in the documentation.""" |
| #115 | self.assertEqual(detect_datatype("https://www.youtube.com/watch?v=Ff4fRgnuFgQ"), DataType.YOUTUBE_VIDEO) |
| #116 | self.assertEqual(detect_datatype("https://en.wikipedia.org/wiki/Mark_Zuckerberg"), DataType.WEB_PAGE) |
| #117 | |
| #118 | def test_doc_examples_configuration(self): |
| #119 | """Test examples used in the documentation.""" |
| #120 | import subprocess |
| #121 | import sys |
| #122 | |
| #123 | subprocess.check_call([sys.executable, "-m", "pip", "install", "wikipedia"]) |
| #124 | import wikipedia |
| #125 | |
| #126 | page = wikipedia.page("Albert Einstein") |
| #127 | # TODO: Add a wikipedia type, so wikipedia is a dependency and we don't need this slow test. |
| #128 | # (timings: import: 1.4s, fetch wiki: 0.7s) |
| #129 | self.assertEqual(detect_datatype(page.content), DataType.TEXT) |
| #130 | |
| #131 | |
| #132 | if __name__ == "__main__": |
| #133 | unittest.main() |
| #134 |