my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	import concurrent.futures
#2	import os
#3	from string import Template
#4	from typing import Optional
#5
#6	import numpy as np
#7	import pysbd
#8	from openai import OpenAI
#9	from tqdm import tqdm
#10
#11	from embedchain.config.evaluation.base import ContextRelevanceConfig
#12	from embedchain.evaluation.base import BaseMetric
#13	from embedchain.utils.evaluation import EvalData, EvalMetric
#14
#15
#16	class ContextRelevance(BaseMetric):
#17	"""
#18	Metric for evaluating the relevance of context in a dataset.
#19	"""
#20
#21	def __init__(self, config: Optional[ContextRelevanceConfig] = ContextRelevanceConfig()):
#22	super().__init__(name=EvalMetric.CONTEXT_RELEVANCY.value)
#23	self.config = config
#24	api_key = self.config.api_key or os.getenv("OPENAI_API_KEY")
#25	if not api_key:
#26	raise ValueError("API key not found. Set 'OPENAI_API_KEY' or pass it in the config.")
#27	self.client = OpenAI(api_key=api_key)
#28	self._sbd = pysbd.Segmenter(language=self.config.language, clean=False)
#29
#30	def _sentence_segmenter(self, text: str) -> list[str]:
#31	"""
#32	Segments the given text into sentences.
#33	"""
#34	return self._sbd.segment(text)
#35
#36	def _compute_score(self, data: EvalData) -> float:
#37	"""
#38	Computes the context relevance score for a given data item.
#39	"""
#40	original_context = "\n".join(data.contexts)
#41	prompt = Template(self.config.prompt).substitute(context=original_context, question=data.question)
#42	response = self.client.chat.completions.create(
#43	model=self.config.model, messages=[{"role": "user", "content": prompt}]
#44	)
#45	useful_context = response.choices[0].message.content.strip()
#46	useful_context_sentences = self._sentence_segmenter(useful_context)
#47	original_context_sentences = self._sentence_segmenter(original_context)
#48
#49	if not original_context_sentences:
#50	return 0.0
#51	return len(useful_context_sentences) / len(original_context_sentences)
#52
#53	def evaluate(self, dataset: list[EvalData]) -> float:
#54	"""
#55	Evaluates the dataset and returns the average context relevance score.
#56	"""
#57	scores = []
#58
#59	with concurrent.futures.ThreadPoolExecutor() as executor:
#60	futures = [executor.submit(self._compute_score, data) for data in dataset]
#61	for future in tqdm(
#62	concurrent.futures.as_completed(futures), total=len(dataset), desc="Evaluating Context Relevancy"
#63	):
#64	try:
#65	scores.append(future.result())
#66	except Exception as e:
#67	print(f"Error during evaluation: {e}")
#68
#69	return np.mean(scores) if scores else 0.0
#70

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public