repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources15d ago| #1 | """ |
| #2 | Unit tests for Mnemosyne Entity Sketching System. |
| #3 | |
| #4 | Tests: |
| #5 | - Levenshtein distance and similarity |
| #6 | - Regex entity extraction |
| #7 | - Fuzzy entity matching |
| #8 | - Triple storage for entities |
| #9 | """ |
| #10 | |
| #11 | import sys |
| #12 | import os |
| #13 | import unittest |
| #14 | |
| #15 | # Add mnemosyne to path |
| #16 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) |
| #17 | |
| #18 | from mnemosyne.core.entities import ( |
| #19 | levenshtein_distance, |
| #20 | similarity, |
| #21 | extract_entities_regex, |
| #22 | find_similar_entities, |
| #23 | ENTITY_EXTRACTION_STOP_WORDS, |
| #24 | ) |
| #25 | |
| #26 | |
| #27 | class TestLevenshtein(unittest.TestCase): |
| #28 | """Test pure Python Levenshtein implementation.""" |
| #29 | |
| #30 | def test_exact_match(self): |
| #31 | self.assertEqual(levenshtein_distance("hello", "hello"), 0) |
| #32 | self.assertEqual(levenshtein_distance("", ""), 0) |
| #33 | |
| #34 | def test_single_insertion(self): |
| #35 | self.assertEqual(levenshtein_distance("cat", "cats"), 1) |
| #36 | |
| #37 | def test_single_deletion(self): |
| #38 | self.assertEqual(levenshtein_distance("cats", "cat"), 1) |
| #39 | |
| #40 | def test_single_substitution(self): |
| #41 | self.assertEqual(levenshtein_distance("cat", "cut"), 1) |
| #42 | |
| #43 | def test_empty_string(self): |
| #44 | self.assertEqual(levenshtein_distance("", "abc"), 3) |
| #45 | self.assertEqual(levenshtein_distance("abc", ""), 3) |
| #46 | |
| #47 | def test_unicode(self): |
| #48 | self.assertEqual(levenshtein_distance("café", "cafe"), 1) |
| #49 | self.assertEqual(levenshtein_distance("日本", "日本語"), 1) |
| #50 | |
| #51 | |
| #52 | class TestSimilarity(unittest.TestCase): |
| #53 | """Test prefix-biased similarity function.""" |
| #54 | |
| #55 | def test_exact_match(self): |
| #56 | self.assertEqual(similarity("Abdias", "Abdias"), 1.0) |
| #57 | |
| #58 | def test_similar_names(self): |
| #59 | # Abdias vs Abdias J. — should be high similarity |
| #60 | self.assertGreater(similarity("Abdias", "Abdias J."), 0.8) |
| #61 | # With prefix boost, should be even higher |
| #62 | self.assertGreater(similarity("Abdias", "Abdias Moya"), 0.7) |
| #63 | |
| #64 | def test_different_names(self): |
| #65 | # Abdias vs Abdul — should be lower |
| #66 | self.assertLess(similarity("Abdias", "Abdul"), 0.8) |
| #67 | self.assertGreater(similarity("Abdias", "Abdul"), 0.3) # Some prefix overlap |
| #68 | |
| #69 | def test_completely_different(self): |
| #70 | self.assertLess(similarity("Abdias", "Zebra"), 0.3) |
| #71 | |
| #72 | def test_case_insensitive(self): |
| #73 | self.assertEqual(similarity("ABDIAS", "abdias"), 1.0) |
| #74 | |
| #75 | def test_short_strings(self): |
| #76 | self.assertEqual(similarity("A", "A"), 1.0) |
| #77 | self.assertEqual(similarity("A", "B"), 0.0) |
| #78 | |
| #79 | def test_partial_prefix(self): |
| #80 | # "Abd" should match "Abdias" reasonably |
| #81 | self.assertGreater(similarity("Abd", "Abdias"), 0.5) |
| #82 | |
| #83 | |
| #84 | class TestRegexEntityExtraction(unittest.TestCase): |
| #85 | """Test regex-based entity extraction.""" |
| #86 | |
| #87 | def test_simple_name(self): |
| #88 | result = extract_entities_regex("I met Abdias yesterday.") |
| #89 | self.assertIn("Abdias", result) |
| #90 | |
| #91 | def test_multiple_names(self): |
| #92 | result = extract_entities_regex("Abdias and Maya went to New York.") |
| #93 | self.assertIn("Abdias", result) |
| #94 | self.assertIn("Maya", result) |
| #95 | self.assertIn("New York", result) |
| #96 | |
| #97 | def test_quoted_phrase(self): |
| #98 | result = extract_entities_regex('She said "Hello World" to everyone.') |
| #99 | self.assertIn("Hello World", result) |
| #100 | |
| #101 | def test_at_mention(self): |
| #102 | result = extract_entities_regex("Contact @abdias for help.") |
| #103 | self.assertIn("@abdias", result) |
| #104 | |
| #105 | def test_hashtag(self): |
| #106 | result = extract_entities_regex("This is #ImportantNews today.") |
| #107 | self.assertIn("#ImportantNews", result) |
| #108 | |
| #109 | def test_stop_words_filtered(self): |
| #110 | result = extract_entities_regex("The Quick Brown Fox") |
| #111 | # "The" should be filtered as a stop word |
| #112 | self.assertNotIn("The", result) |
| #113 | self.assertIn("Quick Brown Fox", result) |
| #114 | |
| #115 | def test_no_entities(self): |
| #116 | result = extract_entities_regex("the quick brown fox jumps") |
| #117 | # All lowercase, no entities expected |
| #118 | self.assertEqual(len(result), 0) |
| #119 | |
| #120 | def test_at_mention(self): |
| #121 | result = extract_entities_regex("Contact @abdias for help.") |
| #122 | # @mentions capture the word after @, not the @ itself |
| #123 | self.assertIn("abdias", result) |
| #124 | |
| #125 | def test_hashtag(self): |
| #126 | result = extract_entities_regex("This is #ImportantNews today.") |
| #127 | # Hashtags capture the word after #, not the # itself |
| #128 | self.assertIn("ImportantNews", result) |
| #129 | |
| #130 | def test_stop_words_filtered(self): |
| #131 | result = extract_entities_regex("The Quick Brown Fox") |
| #132 | # "The" should be filtered as a stop word, but the full phrase |
| #133 | # "The Quick Brown Fox" is kept (capitalized sequence) |
| #134 | self.assertNotIn("The", result) |
| #135 | self.assertIn("The Quick Brown Fox", result) |
| #136 | |
| #137 | def test_mixed_content(self): |
| #138 | result = extract_entities_regex( |
| #139 | "Abdias said: 'The Mnemosyne project is #Awesome. " |
| #140 | "Contact @support or visit New York.'" |
| #141 | ) |
| #142 | self.assertIn("Abdias", result) |
| #143 | # "The Mnemosyne" is extracted as a capitalized sequence |
| #144 | self.assertIn("The Mnemosyne", result) |
| #145 | self.assertIn("Awesome", result) # from #Awesome |
| #146 | self.assertIn("support", result) # from @support |
| #147 | self.assertIn("New York", result) |
| #148 | |
| #149 | |
| #150 | class TestFindSimilarEntities(unittest.TestCase): |
| #151 | """Test fuzzy entity matching against known entities.""" |
| #152 | |
| #153 | def test_exact_match(self): |
| #154 | known = ["Abdias", "Maya", "Mnemosyne"] |
| #155 | result = find_similar_entities("Abdias", known, threshold=0.8) |
| #156 | self.assertEqual(result, [("Abdias", 1.0)]) |
| #157 | |
| #158 | def test_fuzzy_match(self): |
| #159 | known = ["Abdias", "Maya", "Mnemosyne"] |
| #160 | result = find_similar_entities("Abdias J.", known, threshold=0.8) |
| #161 | self.assertIn(("Abdias", 0.8999999999999999), result) |
| #162 | |
| #163 | def test_no_match_below_threshold(self): |
| #164 | known = ["Abdias", "Maya"] |
| #165 | result = find_similar_entities("Zebra", known, threshold=0.8) |
| #166 | self.assertEqual(len(result), 0) |
| #167 | |
| #168 | def test_multiple_matches(self): |
| #169 | known = ["Abdias Moya", "Abdias J.", "Maya"] |
| #170 | result = find_similar_entities("Abdias", known, threshold=0.7) |
| #171 | # Should match both Abdias variants |
| #172 | self.assertGreaterEqual(len(result), 1) |
| #173 | |
| #174 | def test_case_insensitive_match(self): |
| #175 | known = ["Abdias"] |
| #176 | result = find_similar_entities("ABDIAS", known, threshold=0.8) |
| #177 | self.assertEqual(result, [("Abdias", 1.0)]) |
| #178 | |
| #179 | |
| #180 | class TestStopWords(unittest.TestCase): |
| #181 | """Test stop words set.""" |
| #182 | |
| #183 | def test_common_stop_words_present(self): |
| #184 | self.assertIn("the", ENTITY_EXTRACTION_STOP_WORDS) |
| #185 | self.assertIn("and", ENTITY_EXTRACTION_STOP_WORDS) |
| #186 | self.assertIn("for", ENTITY_EXTRACTION_STOP_WORDS) |
| #187 | |
| #188 | def test_case_insensitive(self): |
| #189 | # Stop words are lowercase |
| #190 | self.assertIn("The".lower(), ENTITY_EXTRACTION_STOP_WORDS) |
| #191 | |
| #192 | |
| #193 | class TestEdgeCases(unittest.TestCase): |
| #194 | """Test edge cases and error handling.""" |
| #195 | |
| #196 | def test_empty_string_extraction(self): |
| #197 | result = extract_entities_regex("") |
| #198 | self.assertEqual(len(result), 0) |
| #199 | |
| #200 | def test_whitespace_only(self): |
| #201 | result = extract_entities_regex(" \n\t ") |
| #202 | self.assertEqual(len(result), 0) |
| #203 | |
| #204 | def test_similarity_with_empty(self): |
| #205 | self.assertEqual(similarity("", ""), 1.0) |
| #206 | # Empty vs non-empty: prefix match path — shorter is empty (len 0), |
| #207 | # longer is "abc" (len 3), so 0.7 + (0/3)*0.3 = 0.7 |
| #208 | self.assertEqual(similarity("abc", ""), 0.7) |
| #209 | self.assertEqual(similarity("", "abc"), 0.7) |
| #210 | |
| #211 | def test_levenshtein_with_none(self): |
| #212 | # Should handle None gracefully or raise TypeError |
| #213 | with self.assertRaises((TypeError, AttributeError)): |
| #214 | levenshtein_distance(None, "abc") |
| #215 | |
| #216 | |
| #217 | if __name__ == "__main__": |
| #218 | unittest.main() |
| #219 |