my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	"""LLM client for fact extraction via OpenRouter.
#2
#3	Reuses the same patterns as tools/evaluate_beam_end_to_end.py LLMClient.
#4	100% open source (MIT).
#5	"""
#6
#7	import json as _json
#8	import os
#9	import time
#10	import urllib.request
#11
#12	# ── Defaults ──────────────────────────────────────────────────────────────
#13	DEFAULT_EXTRACTION_MODEL = os.environ.get(
#14	"MNEMOSYNE_EXTRACTION_MODEL",
#15	"google/gemini-2.5-flash",
#16	)
#17	OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
#18	OPENROUTER_BASE_URL = os.environ.get(
#19	"OPENROUTER_BASE_URL",
#20	"https://openrouter.ai/api/v1",
#21	).rstrip("/")
#22	FALLBACK_MODELS = [
#23	"google/gemini-flash-latest",
#24	# Fallback: older
#25	]
#26
#27
#28	class ExtractionClient:
#29	"""OpenAI-compatible API client for fact extraction via OpenRouter."""
#30
#31	def __init__(
#32	self,
#33	model: str = None,
#34	api_key: str = None,
#35	base_url: str = None,
#36	):
#37	self.model = model or DEFAULT_EXTRACTION_MODEL
#38	self.api_key = api_key or OPENROUTER_API_KEY
#39	self.base_url = (base_url or OPENROUTER_BASE_URL).rstrip("/")
#40	self.call_count = 0
#41
#42	def chat(
#43	self,
#44	messages: list,
#45	temperature: float = 0.0,
#46	max_tokens: int = 4096,
#47	) -> str:
#48	"""Send chat completion with fallback and retry.
#49
#50	Returns the response text, or empty string on total failure.
#51	"""
#52	models_to_try = [self.model] + [
#53	m for m in FALLBACK_MODELS if m != self.model
#54	]
#55	last_error = None
#56
#57	for model in models_to_try:
#58	for attempt in range(3):
#59	try:
#60	return self._call_api(
#61	model, messages, temperature, max_tokens
#62	)
#63	except Exception as e:
#64	last_error = str(e)
#65	if "429" in last_error or "rate" in last_error.lower():
#66	wait = 2 ** attempt
#67	time.sleep(wait)
#68	continue
#69	else:
#70	break # Non-retryable, try next model
#71	# Brief pause between models
#72	time.sleep(1)
#73
#74	# All models failed
#75	return ""
#76
#77	def _call_api(
#78	self,
#79	model: str,
#80	messages: list,
#81	temperature: float,
#82	max_tokens: int,
#83	) -> str:
#84	"""Single API call via urllib."""
#85	url = f"{self.base_url}/chat/completions"
#86	payload = _json.dumps(
#87	{
#88	"model": model,
#89	"messages": messages,
#90	"temperature": temperature,
#91	"max_tokens": max_tokens,
#92	}
#93	).encode()
#94	headers = {
#95	"Authorization": f"Bearer {self.api_key}",
#96	"Content-Type": "application/json",
#97	}
#98	req = urllib.request.Request(url, data=payload, headers=headers)
#99	resp = urllib.request.urlopen(req, timeout=60)
#100	data = _json.loads(resp.read())
#101	self.call_count += 1
#102	return data["choices"][0]["message"]["content"]
#103
#104	def extract_facts(self, messages: list) -> list:
#105	"""Extract structured facts from a list of conversation messages.
#106
#107	Args:
#108	messages: List of dicts with 'role' and 'content' keys.
#109
#110	Returns:
#111	List of fact dicts (subject, predicate, object, etc.), or empty list on failure.
#112	"""
#113	from .prompts import EXTRACTION_SYSTEM_PROMPT, EXTRACTION_USER_TEMPLATE
#114
#115	# Build conversation text from messages
#116	conversation_text = ""
#117	for i, msg in enumerate(messages):
#118	role = msg.get("role", "unknown")
#119	content = msg.get("content", "")
#120	if content.strip():
#121	conversation_text += f"[{i}] [{role}]: {content}\n"
#122
#123	if not conversation_text.strip():
#124	return []
#125
#126	user_prompt = EXTRACTION_USER_TEMPLATE.format(
#127	conversation_text=conversation_text,
#128	)
#129
#130	chat_messages = [
#131	{"role": "system", "content": EXTRACTION_SYSTEM_PROMPT},
#132	{"role": "user", "content": user_prompt},
#133	]
#134
#135	response = self.chat(chat_messages, temperature=0.0, max_tokens=4096)
#136
#137	if not response:
#138	return []
#139
#140	# Parse JSON from response
#141	try:
#142	json_start = response.find("[")
#143	json_end = response.rfind("]") + 1
#144	if json_start >= 0 and json_end > json_start:
#145	facts = _json.loads(response[json_start:json_end])
#146	if isinstance(facts, list):
#147	return facts
#148	except (_json.JSONDecodeError, ValueError):
#149	pass
#150
#151	return []
#152

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public