repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | """ |
| #2 | Mnemosyne Structured Fact Extraction |
| #3 | ==================================== |
| #4 | LLM-driven fact extraction as a derived layer. |
| #5 | Extracts 2-5 concise factual statements from raw text. |
| #6 | Facts are stored as TripleStore triples, not replacements for raw text. |
| #7 | |
| #8 | Uses the same LLM fallback chain as local_llm.py: |
| #9 | |
| #10 | 0. Host-provided LLM backend (when MNEMOSYNE_HOST_LLM_ENABLED=true and a |
| #11 | backend is registered). On host attempt with no usable output, skips |
| #12 | the remote URL and goes straight to local GGUF. |
| #13 | 1. Remote OpenAI-compatible API (if MNEMOSYNE_LLM_BASE_URL set |
| #14 | AND MNEMOSYNE_LLM_ENABLED is not false). |
| #15 | 2. Local ctransformers GGUF model. |
| #16 | 3. Skip extraction (graceful degradation). |
| #17 | |
| #18 | Extraction uses temperature=0.0 (deterministic) so re-ingesting the same |
| #19 | content does not create near-duplicate facts in the facts table. |
| #20 | """ |
| #21 | |
| #22 | import os |
| #23 | from typing import List, Optional |
| #24 | |
| #25 | # Reuse local_llm infrastructure |
| #26 | from mnemosyne.core import local_llm |
| #27 | from mnemosyne.core.local_llm import ( |
| #28 | llm_available, |
| #29 | _call_remote_llm, |
| #30 | _load_llm, |
| #31 | _try_host_llm, |
| #32 | LLM_BASE_URL, |
| #33 | LLM_ENABLED, |
| #34 | LLM_MAX_TOKENS, |
| #35 | _clean_output, |
| #36 | ) |
| #37 | |
| #38 | # --- Config ------------------------------------------------------------------ |
| #39 | EXTRACTION_PROMPT = os.environ.get( |
| #40 | "MNEMOSYNE_EXTRACTION_PROMPT", |
| #41 | "Extract 2-5 concise factual statements from the following text. " |
| #42 | "Each fact should be a complete sentence describing something true about the subject. " |
| #43 | "Focus on preferences, opinions, experiences, and factual claims. " |
| #44 | "Return one fact per line. Do not number them. " |
| #45 | "If no facts can be extracted, return 'NO_FACTS'.\n\nText: {text}\n\nFacts:" |
| #46 | ) |
| #47 | |
| #48 | |
| #49 | def _build_extraction_prompt(text: str) -> str: |
| #50 | """Build the extraction prompt with the user text inserted.""" |
| #51 | return EXTRACTION_PROMPT.format(text=text) |
| #52 | |
| #53 | |
| #54 | def _parse_facts(raw_output: str) -> List[str]: |
| #55 | """Parse LLM output into individual facts.""" |
| #56 | if not raw_output or raw_output.strip().upper() == "NO_FACTS": |
| #57 | return [] |
| #58 | |
| #59 | # Split on newlines, filter empty lines |
| #60 | lines = [line.strip() for line in raw_output.split("\n") if line.strip()] |
| #61 | |
| #62 | # Clean up any numbering or bullet prefixes |
| #63 | cleaned = [] |
| #64 | for line in lines: |
| #65 | # Remove leading numbers/bullets: "1. fact" or "- fact" or "* fact" |
| #66 | line = line.lstrip("0123456789.-* ").strip() |
| #67 | if line and len(line) > 10: # Minimum fact length |
| #68 | cleaned.append(line) |
| #69 | |
| #70 | return cleaned[:5] # Cap at 5 facts |
| #71 | |
| #72 | |
| #73 | def extract_facts(text: str) -> List[str]: |
| #74 | """ |
| #75 | Extract structured facts from raw text using LLM. |
| #76 | |
| #77 | Args: |
| #78 | text: Raw memory content to extract facts from |
| #79 | |
| #80 | Returns: |
| #81 | List of extracted fact strings (0-5 items). Empty list if LLM unavailable. |
| #82 | |
| #83 | Notes: |
| #84 | - The host backend (Hermes auxiliary client) is consulted first when |
| #85 | enabled. Temperature is fixed at 0.0 so re-ingesting the same content |
| #86 | produces deterministic facts (avoids near-duplicate writes to the |
| #87 | facts table). |
| #88 | - When the host attempt produces no usable text, the remote URL is |
| #89 | **skipped** — falls through to local GGUF, then []. This honors the |
| #90 | plan's host-vs-remote precedence rule. |
| #91 | """ |
| #92 | if not text or not text.strip(): |
| #93 | return [] |
| #94 | |
| #95 | if not local_llm.llm_available(): |
| #96 | return [] |
| #97 | |
| #98 | prompt = _build_extraction_prompt(text) |
| #99 | |
| #100 | # 0. Host backend (deterministic; temperature=0.0). |
| #101 | # Reference live module values so monkeypatch on local_llm reaches us. |
| #102 | attempted, host_text = local_llm._try_host_llm( |
| #103 | prompt, max_tokens=local_llm.LLM_MAX_TOKENS, temperature=0.0 |
| #104 | ) |
| #105 | if attempted: |
| #106 | if host_text: |
| #107 | facts = _parse_facts(host_text) |
| #108 | if facts: |
| #109 | return facts |
| #110 | # Host attempted but produced no facts. Skip remote per A3; try local. |
| #111 | llm = local_llm._load_llm() |
| #112 | if llm is not None: |
| #113 | try: |
| #114 | raw_output = llm( |
| #115 | prompt, |
| #116 | max_new_tokens=local_llm.LLM_MAX_TOKENS, |
| #117 | stop=["</s>", "<|user|>"], |
| #118 | ) |
| #119 | return _parse_facts(local_llm._clean_output(raw_output)) |
| #120 | except Exception: |
| #121 | return [] |
| #122 | return [] |
| #123 | |
| #124 | # 1. Remote LLM. Pass temperature=0.0 so the C2 determinism contract |
| #125 | # holds even on the standalone remote path (where extract_facts shares |
| #126 | # _call_remote_llm with summarize_memories' default of 0.3). |
| #127 | if local_llm.LLM_ENABLED and local_llm.LLM_BASE_URL: |
| #128 | raw_output = local_llm._call_remote_llm(prompt, temperature=0.0) |
| #129 | if raw_output: |
| #130 | facts = _parse_facts(local_llm._clean_output(raw_output)) |
| #131 | if facts: |
| #132 | return facts |
| #133 | |
| #134 | # 2. Local LLM. |
| #135 | llm = local_llm._load_llm() |
| #136 | if llm is not None: |
| #137 | try: |
| #138 | raw_output = llm( |
| #139 | prompt, |
| #140 | max_new_tokens=local_llm.LLM_MAX_TOKENS, |
| #141 | stop=["</s>", "<|user|>"], |
| #142 | ) |
| #143 | facts = _parse_facts(local_llm._clean_output(raw_output)) |
| #144 | return facts |
| #145 | except Exception: |
| #146 | pass |
| #147 | |
| #148 | return [] |
| #149 | |
| #150 | |
| #151 | def extract_facts_safe(text: str) -> List[str]: |
| #152 | """ |
| #153 | Best-effort fact extraction that never raises. |
| #154 | Wrapper for extract_facts with exception handling. |
| #155 | """ |
| #156 | try: |
| #157 | return extract_facts(text) |
| #158 | except Exception: |
| #159 | return [] |
| #160 |