repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | import json |
| #2 | import os |
| #3 | import time |
| #4 | from collections import defaultdict |
| #5 | |
| #6 | import numpy as np |
| #7 | import tiktoken |
| #8 | from dotenv import load_dotenv |
| #9 | from jinja2 import Template |
| #10 | from openai import OpenAI |
| #11 | from tqdm import tqdm |
| #12 | |
| #13 | load_dotenv() |
| #14 | |
| #15 | PROMPT = """ |
| #16 | # Question: |
| #17 | {{QUESTION}} |
| #18 | |
| #19 | # Context: |
| #20 | {{CONTEXT}} |
| #21 | |
| #22 | # Short answer: |
| #23 | """ |
| #24 | |
| #25 | |
| #26 | class RAGManager: |
| #27 | def __init__(self, data_path="dataset/locomo10_rag.json", chunk_size=500, k=1): |
| #28 | self.model = os.getenv("MODEL") |
| #29 | self.client = OpenAI() |
| #30 | self.data_path = data_path |
| #31 | self.chunk_size = chunk_size |
| #32 | self.k = k |
| #33 | |
| #34 | def generate_response(self, question, context): |
| #35 | template = Template(PROMPT) |
| #36 | prompt = template.render(CONTEXT=context, QUESTION=question) |
| #37 | |
| #38 | max_retries = 3 |
| #39 | retries = 0 |
| #40 | |
| #41 | while retries <= max_retries: |
| #42 | try: |
| #43 | t1 = time.time() |
| #44 | response = self.client.chat.completions.create( |
| #45 | model=self.model, |
| #46 | messages=[ |
| #47 | { |
| #48 | "role": "system", |
| #49 | "content": "You are a helpful assistant that can answer " |
| #50 | "questions based on the provided context." |
| #51 | "If the question involves timing, use the conversation date for reference." |
| #52 | "Provide the shortest possible answer." |
| #53 | "Use words directly from the conversation when possible." |
| #54 | "Avoid using subjects in your answer.", |
| #55 | }, |
| #56 | {"role": "user", "content": prompt}, |
| #57 | ], |
| #58 | temperature=0, |
| #59 | ) |
| #60 | t2 = time.time() |
| #61 | return response.choices[0].message.content.strip(), t2 - t1 |
| #62 | except Exception as e: |
| #63 | retries += 1 |
| #64 | if retries > max_retries: |
| #65 | raise e |
| #66 | time.sleep(1) # Wait before retrying |
| #67 | |
| #68 | def clean_chat_history(self, chat_history): |
| #69 | cleaned_chat_history = "" |
| #70 | for c in chat_history: |
| #71 | cleaned_chat_history += f"{c['timestamp']} | {c['speaker']}: {c['text']}\n" |
| #72 | |
| #73 | return cleaned_chat_history |
| #74 | |
| #75 | def calculate_embedding(self, document): |
| #76 | response = self.client.embeddings.create(model=os.getenv("EMBEDDING_MODEL"), input=document) |
| #77 | return response.data[0].embedding |
| #78 | |
| #79 | def calculate_similarity(self, embedding1, embedding2): |
| #80 | return np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2)) |
| #81 | |
| #82 | def search(self, query, chunks, embeddings, k=1): |
| #83 | """ |
| #84 | Search for the top-k most similar chunks to the query. |
| #85 | |
| #86 | Args: |
| #87 | query: The query string |
| #88 | chunks: List of text chunks |
| #89 | embeddings: List of embeddings for each chunk |
| #90 | k: Number of top chunks to return (default: 1) |
| #91 | |
| #92 | Returns: |
| #93 | combined_chunks: The combined text of the top-k chunks |
| #94 | search_time: Time taken for the search |
| #95 | """ |
| #96 | t1 = time.time() |
| #97 | query_embedding = self.calculate_embedding(query) |
| #98 | similarities = [self.calculate_similarity(query_embedding, embedding) for embedding in embeddings] |
| #99 | |
| #100 | # Get indices of top-k most similar chunks |
| #101 | if k == 1: |
| #102 | # Original behavior - just get the most similar chunk |
| #103 | top_indices = [np.argmax(similarities)] |
| #104 | else: |
| #105 | # Get indices of top-k chunks |
| #106 | top_indices = np.argsort(similarities)[-k:][::-1] |
| #107 | |
| #108 | # Combine the top-k chunks |
| #109 | combined_chunks = "\n<->\n".join([chunks[i] for i in top_indices]) |
| #110 | |
| #111 | t2 = time.time() |
| #112 | return combined_chunks, t2 - t1 |
| #113 | |
| #114 | def create_chunks(self, chat_history, chunk_size=500): |
| #115 | """ |
| #116 | Create chunks using tiktoken for more accurate token counting |
| #117 | """ |
| #118 | # Get the encoding for the model |
| #119 | encoding = tiktoken.encoding_for_model(os.getenv("EMBEDDING_MODEL")) |
| #120 | |
| #121 | documents = self.clean_chat_history(chat_history) |
| #122 | |
| #123 | if chunk_size == -1: |
| #124 | return [documents], [] |
| #125 | |
| #126 | chunks = [] |
| #127 | |
| #128 | # Encode the document |
| #129 | tokens = encoding.encode(documents) |
| #130 | |
| #131 | # Split into chunks based on token count |
| #132 | for i in range(0, len(tokens), chunk_size): |
| #133 | chunk_tokens = tokens[i : i + chunk_size] |
| #134 | chunk = encoding.decode(chunk_tokens) |
| #135 | chunks.append(chunk) |
| #136 | |
| #137 | embeddings = [] |
| #138 | for chunk in chunks: |
| #139 | embedding = self.calculate_embedding(chunk) |
| #140 | embeddings.append(embedding) |
| #141 | |
| #142 | return chunks, embeddings |
| #143 | |
| #144 | def process_all_conversations(self, output_file_path): |
| #145 | with open(self.data_path, "r") as f: |
| #146 | data = json.load(f) |
| #147 | |
| #148 | FINAL_RESULTS = defaultdict(list) |
| #149 | for key, value in tqdm(data.items(), desc="Processing conversations"): |
| #150 | chat_history = value["conversation"] |
| #151 | questions = value["question"] |
| #152 | |
| #153 | chunks, embeddings = self.create_chunks(chat_history, self.chunk_size) |
| #154 | |
| #155 | for item in tqdm(questions, desc="Answering questions", leave=False): |
| #156 | question = item["question"] |
| #157 | answer = item.get("answer", "") |
| #158 | category = item["category"] |
| #159 | |
| #160 | if self.chunk_size == -1: |
| #161 | context = chunks[0] |
| #162 | search_time = 0 |
| #163 | else: |
| #164 | context, search_time = self.search(question, chunks, embeddings, k=self.k) |
| #165 | response, response_time = self.generate_response(question, context) |
| #166 | |
| #167 | FINAL_RESULTS[key].append( |
| #168 | { |
| #169 | "question": question, |
| #170 | "answer": answer, |
| #171 | "category": category, |
| #172 | "context": context, |
| #173 | "response": response, |
| #174 | "search_time": search_time, |
| #175 | "response_time": response_time, |
| #176 | } |
| #177 | ) |
| #178 | with open(output_file_path, "w+") as f: |
| #179 | json.dump(FINAL_RESULTS, f, indent=4) |
| #180 | |
| #181 | # Save results |
| #182 | with open(output_file_path, "w+") as f: |
| #183 | json.dump(FINAL_RESULTS, f, indent=4) |
| #184 |