File tree Expand file tree Collapse file tree 1 file changed +2
-3
lines changed
lmms_eval/tasks/vl_rewardbench Expand file tree Collapse file tree 1 file changed +2
-3
lines changed Original file line number Diff line number Diff line change 7
7
import requests
8
8
from loguru import logger as eval_logger
9
9
10
-
11
10
LLM_PARSE_ANSWER_PROMPT = """
12
11
You are given a pairwise judgement for two responses. Please return the better response according to the judgement.
13
12
Return the Answer X ONLY. e.g., Answer 1 or Answer 2.
@@ -102,9 +101,9 @@ def vlrewardbench_process_results(doc, results):
102
101
a dictionary with key: metric name (in this case mme score), value: metric value
103
102
"""
104
103
pred = results [0 ]
105
- pred_ans = parse_pred_ans (pred ) # 1 or 2 indicte which one is better
104
+ pred_ans = parse_pred_ans (pred ) # 1 or 2 indicte which one is better
106
105
random_number = sum (len (res ) for res in doc ["response" ]) % 2 # we use the length sum % 2 as a random number generator to decide the order of the answers
107
- # Note: human_ranking [0, 1] -> answer 1 is better, [1, 0] -> answer 2 is better
106
+ # Note: human_ranking [0, 1] -> answer 1 is better, [1, 0] -> answer 2 is better
108
107
gt_ans = doc ["human_ranking" ].index (0 if random_number == 0 else 1 ) + 1
109
108
110
109
if pred_ans == gt_ans :
You can’t perform that action at this time.
0 commit comments