my-project-public

repository

loading code, commits, and activity

repositories

loading repo index

#1	import argparse
#2	import concurrent.futures
#3	import json
#4	import threading
#5	from collections import defaultdict
#6
#7	from metrics.llm_judge import evaluate_llm_judge
#8	from metrics.utils import calculate_bleu_scores, calculate_metrics
#9	from tqdm import tqdm
#10
#11
#12	def process_item(item_data):
#13	k, v = item_data
#14	local_results = defaultdict(list)
#15
#16	for item in v:
#17	gt_answer = str(item["answer"])
#18	pred_answer = str(item["response"])
#19	category = str(item["category"])
#20	question = str(item["question"])
#21
#22	# Skip category 5
#23	if category == "5":
#24	continue
#25
#26	metrics = calculate_metrics(pred_answer, gt_answer)
#27	bleu_scores = calculate_bleu_scores(pred_answer, gt_answer)
#28	llm_score = evaluate_llm_judge(question, gt_answer, pred_answer)
#29
#30	local_results[k].append(
#31	{
#32	"question": question,
#33	"answer": gt_answer,
#34	"response": pred_answer,
#35	"category": category,
#36	"bleu_score": bleu_scores["bleu1"],
#37	"f1_score": metrics["f1"],
#38	"llm_score": llm_score,
#39	}
#40	)
#41
#42	return local_results
#43
#44
#45	def main():
#46	parser = argparse.ArgumentParser(description="Evaluate RAG results")
#47	parser.add_argument(
#48	"--input_file", type=str, default="results/rag_results_500_k1.json", help="Path to the input dataset file"
#49	)
#50	parser.add_argument(
#51	"--output_file", type=str, default="evaluation_metrics.json", help="Path to save the evaluation results"
#52	)
#53	parser.add_argument("--max_workers", type=int, default=10, help="Maximum number of worker threads")
#54
#55	args = parser.parse_args()
#56
#57	with open(args.input_file, "r") as f:
#58	data = json.load(f)
#59
#60	results = defaultdict(list)
#61	results_lock = threading.Lock()
#62
#63	# Use ThreadPoolExecutor with specified workers
#64	with concurrent.futures.ThreadPoolExecutor(max_workers=args.max_workers) as executor:
#65	futures = [executor.submit(process_item, item_data) for item_data in data.items()]
#66
#67	for future in tqdm(concurrent.futures.as_completed(futures), total=len(futures)):
#68	local_results = future.result()
#69	with results_lock:
#70	for k, items in local_results.items():
#71	results[k].extend(items)
#72
#73	# Save results to JSON file
#74	with open(args.output_file, "w") as f:
#75	json.dump(results, f, indent=4)
#76
#77	print(f"Results saved to {args.output_file}")
#78
#79
#80	if __name__ == "__main__":
#81	main()
#82

z6Mkq5mY3JWtxoxUobWcfNHm7AkRubgSWEZTkBVqZXJviFZ5/my-project-public