repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | import concurrent.futures |
| #2 | import os |
| #3 | from string import Template |
| #4 | from typing import Optional |
| #5 | |
| #6 | import numpy as np |
| #7 | import pysbd |
| #8 | from openai import OpenAI |
| #9 | from tqdm import tqdm |
| #10 | |
| #11 | from embedchain.config.evaluation.base import ContextRelevanceConfig |
| #12 | from embedchain.evaluation.base import BaseMetric |
| #13 | from embedchain.utils.evaluation import EvalData, EvalMetric |
| #14 | |
| #15 | |
| #16 | class ContextRelevance(BaseMetric): |
| #17 | """ |
| #18 | Metric for evaluating the relevance of context in a dataset. |
| #19 | """ |
| #20 | |
| #21 | def __init__(self, config: Optional[ContextRelevanceConfig] = ContextRelevanceConfig()): |
| #22 | super().__init__(name=EvalMetric.CONTEXT_RELEVANCY.value) |
| #23 | self.config = config |
| #24 | api_key = self.config.api_key or os.getenv("OPENAI_API_KEY") |
| #25 | if not api_key: |
| #26 | raise ValueError("API key not found. Set 'OPENAI_API_KEY' or pass it in the config.") |
| #27 | self.client = OpenAI(api_key=api_key) |
| #28 | self._sbd = pysbd.Segmenter(language=self.config.language, clean=False) |
| #29 | |
| #30 | def _sentence_segmenter(self, text: str) -> list[str]: |
| #31 | """ |
| #32 | Segments the given text into sentences. |
| #33 | """ |
| #34 | return self._sbd.segment(text) |
| #35 | |
| #36 | def _compute_score(self, data: EvalData) -> float: |
| #37 | """ |
| #38 | Computes the context relevance score for a given data item. |
| #39 | """ |
| #40 | original_context = "\n".join(data.contexts) |
| #41 | prompt = Template(self.config.prompt).substitute(context=original_context, question=data.question) |
| #42 | response = self.client.chat.completions.create( |
| #43 | model=self.config.model, messages=[{"role": "user", "content": prompt}] |
| #44 | ) |
| #45 | useful_context = response.choices[0].message.content.strip() |
| #46 | useful_context_sentences = self._sentence_segmenter(useful_context) |
| #47 | original_context_sentences = self._sentence_segmenter(original_context) |
| #48 | |
| #49 | if not original_context_sentences: |
| #50 | return 0.0 |
| #51 | return len(useful_context_sentences) / len(original_context_sentences) |
| #52 | |
| #53 | def evaluate(self, dataset: list[EvalData]) -> float: |
| #54 | """ |
| #55 | Evaluates the dataset and returns the average context relevance score. |
| #56 | """ |
| #57 | scores = [] |
| #58 | |
| #59 | with concurrent.futures.ThreadPoolExecutor() as executor: |
| #60 | futures = [executor.submit(self._compute_score, data) for data in dataset] |
| #61 | for future in tqdm( |
| #62 | concurrent.futures.as_completed(futures), total=len(dataset), desc="Evaluating Context Relevancy" |
| #63 | ): |
| #64 | try: |
| #65 | scores.append(future.result()) |
| #66 | except Exception as e: |
| #67 | print(f"Error during evaluation: {e}") |
| #68 | |
| #69 | return np.mean(scores) if scores else 0.0 |
| #70 |