repositories
loading repo index
repositories
loading repo index
repository
loading code, commits, and activity
public Clawd ADK gateway launch mirror
stars
latest
clone command
git clone gitlawb://did:key:z6Mkq5mY...iFZ5/my-project-publ...git clone gitlawb://did:key:z6Mkq5mY.../my-project-publ...2fa351d6docs: add automaton and perps launch sources16d ago| #1 | import argparse |
| #2 | import json |
| #3 | import os |
| #4 | import time |
| #5 | from collections import defaultdict |
| #6 | |
| #7 | from dotenv import load_dotenv |
| #8 | from jinja2 import Template |
| #9 | from openai import OpenAI |
| #10 | from tqdm import tqdm |
| #11 | |
| #12 | load_dotenv() |
| #13 | |
| #14 | |
| #15 | ANSWER_PROMPT = """ |
| #16 | You are an intelligent memory assistant tasked with retrieving accurate information from conversation memories. |
| #17 | |
| #18 | # CONTEXT: |
| #19 | You have access to memories from a conversation. These memories contain |
| #20 | timestamped information that may be relevant to answering the question. |
| #21 | |
| #22 | # INSTRUCTIONS: |
| #23 | 1. Carefully analyze all provided memories |
| #24 | 2. Pay special attention to the timestamps to determine the answer |
| #25 | 3. If the question asks about a specific event or fact, look for direct evidence in the memories |
| #26 | 4. If the memories contain contradictory information, prioritize the most recent memory |
| #27 | 5. If there is a question about time references (like "last year", "two months ago", etc.), |
| #28 | calculate the actual date based on the memory timestamp. For example, if a memory from |
| #29 | 4 May 2022 mentions "went to India last year," then the trip occurred in 2021. |
| #30 | 6. Always convert relative time references to specific dates, months, or years. For example, |
| #31 | convert "last year" to "2022" or "two months ago" to "March 2023" based on the memory |
| #32 | timestamp. Ignore the reference while answering the question. |
| #33 | 7. Focus only on the content of the memories. Do not confuse character |
| #34 | names mentioned in memories with the actual users who created those memories. |
| #35 | 8. The answer should be less than 5-6 words. |
| #36 | |
| #37 | # APPROACH (Think step by step): |
| #38 | 1. First, examine all memories that contain information related to the question |
| #39 | 2. Examine the timestamps and content of these memories carefully |
| #40 | 3. Look for explicit mentions of dates, times, locations, or events that answer the question |
| #41 | 4. If the answer requires calculation (e.g., converting relative time references), show your work |
| #42 | 5. Formulate a precise, concise answer based solely on the evidence in the memories |
| #43 | 6. Double-check that your answer directly addresses the question asked |
| #44 | 7. Ensure your final answer is specific and avoids vague time references |
| #45 | |
| #46 | Memories: |
| #47 | |
| #48 | {{memories}} |
| #49 | |
| #50 | Question: {{question}} |
| #51 | Answer: |
| #52 | """ |
| #53 | |
| #54 | |
| #55 | class OpenAIPredict: |
| #56 | def __init__(self, model="gpt-4o-mini"): |
| #57 | self.model = model |
| #58 | self.openai_client = OpenAI() |
| #59 | self.results = defaultdict(list) |
| #60 | |
| #61 | def search_memory(self, idx): |
| #62 | with open(f"memories/{idx}.txt", "r") as file: |
| #63 | memories = file.read() |
| #64 | |
| #65 | return memories, 0 |
| #66 | |
| #67 | def process_question(self, val, idx): |
| #68 | question = val.get("question", "") |
| #69 | answer = val.get("answer", "") |
| #70 | category = val.get("category", -1) |
| #71 | evidence = val.get("evidence", []) |
| #72 | adversarial_answer = val.get("adversarial_answer", "") |
| #73 | |
| #74 | response, search_memory_time, response_time, context = self.answer_question(idx, question) |
| #75 | |
| #76 | result = { |
| #77 | "question": question, |
| #78 | "answer": answer, |
| #79 | "category": category, |
| #80 | "evidence": evidence, |
| #81 | "response": response, |
| #82 | "adversarial_answer": adversarial_answer, |
| #83 | "search_memory_time": search_memory_time, |
| #84 | "response_time": response_time, |
| #85 | "context": context, |
| #86 | } |
| #87 | |
| #88 | return result |
| #89 | |
| #90 | def answer_question(self, idx, question): |
| #91 | memories, search_memory_time = self.search_memory(idx) |
| #92 | |
| #93 | template = Template(ANSWER_PROMPT) |
| #94 | answer_prompt = template.render(memories=memories, question=question) |
| #95 | |
| #96 | t1 = time.time() |
| #97 | response = self.openai_client.chat.completions.create( |
| #98 | model=os.getenv("MODEL"), messages=[{"role": "system", "content": answer_prompt}], temperature=0.0 |
| #99 | ) |
| #100 | t2 = time.time() |
| #101 | response_time = t2 - t1 |
| #102 | return response.choices[0].message.content, search_memory_time, response_time, memories |
| #103 | |
| #104 | def process_data_file(self, file_path, output_file_path): |
| #105 | with open(file_path, "r") as f: |
| #106 | data = json.load(f) |
| #107 | |
| #108 | for idx, item in tqdm(enumerate(data), total=len(data), desc="Processing conversations"): |
| #109 | qa = item["qa"] |
| #110 | |
| #111 | for question_item in tqdm( |
| #112 | qa, total=len(qa), desc=f"Processing questions for conversation {idx}", leave=False |
| #113 | ): |
| #114 | result = self.process_question(question_item, idx) |
| #115 | self.results[idx].append(result) |
| #116 | |
| #117 | # Save results after each question is processed |
| #118 | with open(output_file_path, "w") as f: |
| #119 | json.dump(self.results, f, indent=4) |
| #120 | |
| #121 | # Final save at the end |
| #122 | with open(output_file_path, "w") as f: |
| #123 | json.dump(self.results, f, indent=4) |
| #124 | |
| #125 | |
| #126 | if __name__ == "__main__": |
| #127 | parser = argparse.ArgumentParser() |
| #128 | parser.add_argument("--output_file_path", type=str, required=True) |
| #129 | args = parser.parse_args() |
| #130 | openai_predict = OpenAIPredict() |
| #131 | openai_predict.process_data_file("../../dataset/locomo10.json", args.output_file_path) |
| #132 |