repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | import concurrent.futures |
| #2 | import logging |
| #3 | import os |
| #4 | from string import Template |
| #5 | from typing import Optional |
| #6 | |
| #7 | import numpy as np |
| #8 | from openai import OpenAI |
| #9 | from tqdm import tqdm |
| #10 | |
| #11 | from embedchain.config.evaluation.base import AnswerRelevanceConfig |
| #12 | from embedchain.evaluation.base import BaseMetric |
| #13 | from embedchain.utils.evaluation import EvalData, EvalMetric |
| #14 | |
| #15 | logger = logging.getLogger(__name__) |
| #16 | |
| #17 | |
| #18 | class AnswerRelevance(BaseMetric): |
| #19 | """ |
| #20 | Metric for evaluating the relevance of answers. |
| #21 | """ |
| #22 | |
| #23 | def __init__(self, config: Optional[AnswerRelevanceConfig] = AnswerRelevanceConfig()): |
| #24 | super().__init__(name=EvalMetric.ANSWER_RELEVANCY.value) |
| #25 | self.config = config |
| #26 | api_key = self.config.api_key or os.getenv("OPENAI_API_KEY") |
| #27 | if not api_key: |
| #28 | raise ValueError("API key not found. Set 'OPENAI_API_KEY' or pass it in the config.") |
| #29 | self.client = OpenAI(api_key=api_key) |
| #30 | |
| #31 | def _generate_prompt(self, data: EvalData) -> str: |
| #32 | """ |
| #33 | Generates a prompt based on the provided data. |
| #34 | """ |
| #35 | return Template(self.config.prompt).substitute( |
| #36 | num_gen_questions=self.config.num_gen_questions, answer=data.answer |
| #37 | ) |
| #38 | |
| #39 | def _generate_questions(self, prompt: str) -> list[str]: |
| #40 | """ |
| #41 | Generates questions from the prompt. |
| #42 | """ |
| #43 | response = self.client.chat.completions.create( |
| #44 | model=self.config.model, |
| #45 | messages=[{"role": "user", "content": prompt}], |
| #46 | ) |
| #47 | return response.choices[0].message.content.strip().split("\n") |
| #48 | |
| #49 | def _generate_embedding(self, question: str) -> np.ndarray: |
| #50 | """ |
| #51 | Generates the embedding for a question. |
| #52 | """ |
| #53 | response = self.client.embeddings.create( |
| #54 | input=question, |
| #55 | model=self.config.embedder, |
| #56 | ) |
| #57 | return np.array(response.data[0].embedding) |
| #58 | |
| #59 | def _compute_similarity(self, original: np.ndarray, generated: np.ndarray) -> float: |
| #60 | """ |
| #61 | Computes the cosine similarity between two embeddings. |
| #62 | """ |
| #63 | original = original.reshape(1, -1) |
| #64 | norm = np.linalg.norm(original) * np.linalg.norm(generated, axis=1) |
| #65 | return np.dot(generated, original.T).flatten() / norm |
| #66 | |
| #67 | def _compute_score(self, data: EvalData) -> float: |
| #68 | """ |
| #69 | Computes the relevance score for a given data item. |
| #70 | """ |
| #71 | prompt = self._generate_prompt(data) |
| #72 | generated_questions = self._generate_questions(prompt) |
| #73 | original_embedding = self._generate_embedding(data.question) |
| #74 | generated_embeddings = np.array([self._generate_embedding(q) for q in generated_questions]) |
| #75 | similarities = self._compute_similarity(original_embedding, generated_embeddings) |
| #76 | return np.mean(similarities) |
| #77 | |
| #78 | def evaluate(self, dataset: list[EvalData]) -> float: |
| #79 | """ |
| #80 | Evaluates the dataset and returns the average answer relevance score. |
| #81 | """ |
| #82 | results = [] |
| #83 | |
| #84 | with concurrent.futures.ThreadPoolExecutor() as executor: |
| #85 | future_to_data = {executor.submit(self._compute_score, data): data for data in dataset} |
| #86 | for future in tqdm( |
| #87 | concurrent.futures.as_completed(future_to_data), total=len(dataset), desc="Evaluating Answer Relevancy" |
| #88 | ): |
| #89 | data = future_to_data[future] |
| #90 | try: |
| #91 | results.append(future.result()) |
| #92 | except Exception as e: |
| #93 | logger.error(f"Error evaluating answer relevancy for {data}: {e}") |
| #94 | |
| #95 | return np.mean(results) if results else 0.0 |
| #96 |