my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	import argparse
#2	import json
#3	import os
#4	import time
#5	from collections import defaultdict
#6
#7	from dotenv import load_dotenv
#8	from jinja2 import Template
#9	from openai import OpenAI
#10	from tqdm import tqdm
#11
#12	load_dotenv()
#13
#14
#15	ANSWER_PROMPT = """
#16	You are an intelligent memory assistant tasked with retrieving accurate information from conversation memories.
#17
#18	# CONTEXT:
#19	You have access to memories from a conversation. These memories contain
#20	timestamped information that may be relevant to answering the question.
#21
#22	# INSTRUCTIONS:
#23	1. Carefully analyze all provided memories
#24	2. Pay special attention to the timestamps to determine the answer
#25	3. If the question asks about a specific event or fact, look for direct evidence in the memories
#26	4. If the memories contain contradictory information, prioritize the most recent memory
#27	5. If there is a question about time references (like "last year", "two months ago", etc.),
#28	calculate the actual date based on the memory timestamp. For example, if a memory from
#29	4 May 2022 mentions "went to India last year," then the trip occurred in 2021.
#30	6. Always convert relative time references to specific dates, months, or years. For example,
#31	convert "last year" to "2022" or "two months ago" to "March 2023" based on the memory
#32	timestamp. Ignore the reference while answering the question.
#33	7. Focus only on the content of the memories. Do not confuse character
#34	names mentioned in memories with the actual users who created those memories.
#35	8. The answer should be less than 5-6 words.
#36
#37	# APPROACH (Think step by step):
#38	1. First, examine all memories that contain information related to the question
#39	2. Examine the timestamps and content of these memories carefully
#40	3. Look for explicit mentions of dates, times, locations, or events that answer the question
#41	4. If the answer requires calculation (e.g., converting relative time references), show your work
#42	5. Formulate a precise, concise answer based solely on the evidence in the memories
#43	6. Double-check that your answer directly addresses the question asked
#44	7. Ensure your final answer is specific and avoids vague time references
#45
#46	Memories:
#47
#48	{{memories}}
#49
#50	Question: {{question}}
#51	Answer:
#52	"""
#53
#54
#55	class OpenAIPredict:
#56	def __init__(self, model="gpt-4o-mini"):
#57	self.model = model
#58	self.openai_client = OpenAI()
#59	self.results = defaultdict(list)
#60
#61	def search_memory(self, idx):
#62	with open(f"memories/{idx}.txt", "r") as file:
#63	memories = file.read()
#64
#65	return memories, 0
#66
#67	def process_question(self, val, idx):
#68	question = val.get("question", "")
#69	answer = val.get("answer", "")
#70	category = val.get("category", -1)
#71	evidence = val.get("evidence", [])
#72	adversarial_answer = val.get("adversarial_answer", "")
#73
#74	response, search_memory_time, response_time, context = self.answer_question(idx, question)
#75
#76	result = {
#77	"question": question,
#78	"answer": answer,
#79	"category": category,
#80	"evidence": evidence,
#81	"response": response,
#82	"adversarial_answer": adversarial_answer,
#83	"search_memory_time": search_memory_time,
#84	"response_time": response_time,
#85	"context": context,
#86	}
#87
#88	return result
#89
#90	def answer_question(self, idx, question):
#91	memories, search_memory_time = self.search_memory(idx)
#92
#93	template = Template(ANSWER_PROMPT)
#94	answer_prompt = template.render(memories=memories, question=question)
#95
#96	t1 = time.time()
#97	response = self.openai_client.chat.completions.create(
#98	model=os.getenv("MODEL"), messages=[{"role": "system", "content": answer_prompt}], temperature=0.0
#99	)
#100	t2 = time.time()
#101	response_time = t2 - t1
#102	return response.choices[0].message.content, search_memory_time, response_time, memories
#103
#104	def process_data_file(self, file_path, output_file_path):
#105	with open(file_path, "r") as f:
#106	data = json.load(f)
#107
#108	for idx, item in tqdm(enumerate(data), total=len(data), desc="Processing conversations"):
#109	qa = item["qa"]
#110
#111	for question_item in tqdm(
#112	qa, total=len(qa), desc=f"Processing questions for conversation {idx}", leave=False
#113	):
#114	result = self.process_question(question_item, idx)
#115	self.results[idx].append(result)
#116
#117	# Save results after each question is processed
#118	with open(output_file_path, "w") as f:
#119	json.dump(self.results, f, indent=4)
#120
#121	# Final save at the end
#122	with open(output_file_path, "w") as f:
#123	json.dump(self.results, f, indent=4)
#124
#125
#126	if __name__ == "__main__":
#127	parser = argparse.ArgumentParser()
#128	parser.add_argument("--output_file_path", type=str, required=True)
#129	args = parser.parse_args()
#130	openai_predict = OpenAIPredict()
#131	openai_predict.process_data_file("../../dataset/locomo10.json", args.output_file_path)
#132

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public