From 22ed307d842f859eb5c7513e4355992f5b138ba0 Mon Sep 17 00:00:00 2001
From: Bo Li <drluodian@gmail.com>
Date: Sat, 24 Aug 2024 07:16:45 +0000
Subject: [PATCH] fix: update from previous model_specific_prompt to current
 lmms_eval_specific_kwargs

---
 docs/task_guide.md                                     | 4 ++--
 lmms_eval/tasks/ai2d/ai2d_lite.yaml                    | 2 +-
 lmms_eval/tasks/chartqa/chartqa_lite.yaml              | 2 +-
 lmms_eval/tasks/docvqa/docvqa_val_lite.yaml            | 2 +-
 lmms_eval/tasks/gqa/gqa_lite.yaml                      | 2 +-
 lmms_eval/tasks/infovqa/infovqa_val_lite.yaml          | 2 +-
 lmms_eval/tasks/mirb/mirb.yaml                         | 2 +-
 lmms_eval/tasks/mirb/utils.py                          | 6 +++---
 lmms_eval/tasks/mmbench/mmbench_cn_dev_lite.yaml       | 2 +-
 lmms_eval/tasks/mmbench/mmbench_en_dev_lite.yaml       | 2 +-
 lmms_eval/tasks/ok_vqa/ok_vqa_val2014_lite.yaml        | 2 +-
 lmms_eval/tasks/seedbench_2_plus/seedbench_2_plus.yaml | 2 +-
 lmms_eval/tasks/textcaps/textcaps_val_lite.yaml        | 2 +-
 lmms_eval/tasks/textvqa/textvqa_val_lite.yaml          | 2 +-
 lmms_eval/tasks/vibe_eval/utils.py                     | 2 ++
 lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_val_lite.yaml    | 2 +-
 lmms_eval/tasks/vqav2/vqav2_val_lite.yaml              | 2 +-
 17 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/docs/task_guide.md b/docs/task_guide.md
index 1376bc22e..1e7d3a9dc 100755
--- a/docs/task_guide.md
+++ b/docs/task_guide.md
@@ -40,7 +40,7 @@ metric_list:
   - metric: mme_cognition_score
     aggregation: !function utils.mme_aggregate_results
     higher_is_better: true
-model_specific_prompt_kwargs:
+lmms_eval_specific_kwargs:
   default:
     pre_prompt: ""
     post_prompt: "\nAnswer the question using a single word or phrase."
@@ -52,7 +52,7 @@ metadata:
 ```
 
 You can pay special attention to the `process_results` and `metric_list` fields, which are used to define how the model output is post-processed and scored.
-Also, the `model_specific_prompt_kwargs` field is used to define model-specific prompt configurations. The default is set to follow Llava.
+Also, the `lmms_eval_specific_kwargs` field is used to define model-specific prompt configurations. The default is set to follow Llava.
 
 PPL-based tasks:
 - Seedbench (`lmms_eval/tasks/seedbench/seedbench_ppl.yaml`)
diff --git a/lmms_eval/tasks/ai2d/ai2d_lite.yaml b/lmms_eval/tasks/ai2d/ai2d_lite.yaml
index bdeb97244..b71abe808 100644
--- a/lmms_eval/tasks/ai2d/ai2d_lite.yaml
+++ b/lmms_eval/tasks/ai2d/ai2d_lite.yaml
@@ -9,7 +9,7 @@ doc_to_visual: !function utils.ai2d_doc_to_visual
 doc_to_text: !function utils.ai2d_doc_to_text
 doc_to_target: !function utils.ai2d_doc_to_target
   
-model_specific_prompt_kwargs:
+lmms_eval_specific_kwargs:
   default:
     prompt_format: mcq
     pre_prompt: ""
diff --git a/lmms_eval/tasks/chartqa/chartqa_lite.yaml b/lmms_eval/tasks/chartqa/chartqa_lite.yaml
index 4fbce4975..96daff5f4 100644
--- a/lmms_eval/tasks/chartqa/chartqa_lite.yaml
+++ b/lmms_eval/tasks/chartqa/chartqa_lite.yaml
@@ -25,7 +25,7 @@ metric_list:
     higher_is_better: true
 metadata:
   - version: 0.0
-model_specific_prompt_kwargs:
+lmms_eval_specific_kwargs:
   default:
     pre_prompt: ""
     post_prompt: "\nAnswer the question with a single word."
diff --git a/lmms_eval/tasks/docvqa/docvqa_val_lite.yaml b/lmms_eval/tasks/docvqa/docvqa_val_lite.yaml
index afda5eb57..95d065df7 100644
--- a/lmms_eval/tasks/docvqa/docvqa_val_lite.yaml
+++ b/lmms_eval/tasks/docvqa/docvqa_val_lite.yaml
@@ -16,7 +16,7 @@ generation_kwargs:
   max_new_tokens: 32
   temperature: 0
   do_sample: False
-model_specific_prompt_kwargs:
+lmms_eval_specific_kwargs:
   default:
     pre_prompt: ""
     post_prompt: "\nAnswer the question using a single word or phrase."
diff --git a/lmms_eval/tasks/gqa/gqa_lite.yaml b/lmms_eval/tasks/gqa/gqa_lite.yaml
index 810420419..7f432fc40 100644
--- a/lmms_eval/tasks/gqa/gqa_lite.yaml
+++ b/lmms_eval/tasks/gqa/gqa_lite.yaml
@@ -23,7 +23,7 @@ metric_list:
 metadata:
   - version: 0.0
   
-model_specific_prompt_kwargs:
+lmms_eval_specific_kwargs:
   default:
     pre_prompt: ""
     post_prompt: "\nAnswer the question using a single word or phrase."
diff --git a/lmms_eval/tasks/infovqa/infovqa_val_lite.yaml b/lmms_eval/tasks/infovqa/infovqa_val_lite.yaml
index eab751209..f52ded6ea 100644
--- a/lmms_eval/tasks/infovqa/infovqa_val_lite.yaml
+++ b/lmms_eval/tasks/infovqa/infovqa_val_lite.yaml
@@ -16,7 +16,7 @@ generation_kwargs:
   max_new_tokens: 32
   temperature: 0
   do_sample: False
-model_specific_prompt_kwargs:
+lmms_eval_specific_kwargs:
   default:
     pre_prompt: ""
     post_prompt: "\nAnswer the question using a single word or phrase."
\ No newline at end of file
diff --git a/lmms_eval/tasks/mirb/mirb.yaml b/lmms_eval/tasks/mirb/mirb.yaml
index b42e3c84e..099e578c0 100644
--- a/lmms_eval/tasks/mirb/mirb.yaml
+++ b/lmms_eval/tasks/mirb/mirb.yaml
@@ -10,7 +10,7 @@ doc_to_text: !function utils.mirb_doc_to_text
 doc_to_target: !function utils.mirb_doc_to_target
 process_results: !function utils.mirb_process_results
   
-model_specific_prompt_kwargs:
+lmms_eval_specific_kwargs:
   default:
     pre_prompt: ""
     post_prompt: ""
diff --git a/lmms_eval/tasks/mirb/utils.py b/lmms_eval/tasks/mirb/utils.py
index 174659dce..3e675d396 100644
--- a/lmms_eval/tasks/mirb/utils.py
+++ b/lmms_eval/tasks/mirb/utils.py
@@ -24,11 +24,11 @@ def get_task_instruction(dataset):
     return instr
 
 
-def mirb_doc_to_text(doc, model_specific_prompt_kwargs=None):
+def mirb_doc_to_text(doc, lmms_eval_specific_kwargs=None):
     subset, question = doc["subset"], doc["questions"]
     task_instruction = get_task_instruction(subset)
-    post_prompt = model_specific_prompt_kwargs["post_prompt"]
-    pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
+    post_prompt = lmms_eval_specific_kwargs["post_prompt"]
+    pre_prompt = lmms_eval_specific_kwargs["pre_prompt"]
     return f"{pre_prompt}{task_instruction}{question}{post_prompt}"
 
 
diff --git a/lmms_eval/tasks/mmbench/mmbench_cn_dev_lite.yaml b/lmms_eval/tasks/mmbench/mmbench_cn_dev_lite.yaml
index 4fc38f969..78b36a650 100644
--- a/lmms_eval/tasks/mmbench/mmbench_cn_dev_lite.yaml
+++ b/lmms_eval/tasks/mmbench/mmbench_cn_dev_lite.yaml
@@ -22,7 +22,7 @@ generation_kwargs:
   num_beams: 1
   do_sample: false
 process_results: !function cn_utils.mmbench_process_results
-model_specific_prompt_kwargs:
+lmms_eval_specific_kwargs:
   default:
     pre_prompt: ""
     post_prompt: "\n请直接使用所提供的选项字母作为答案回答。"
diff --git a/lmms_eval/tasks/mmbench/mmbench_en_dev_lite.yaml b/lmms_eval/tasks/mmbench/mmbench_en_dev_lite.yaml
index 60b9574e1..226c4843d 100644
--- a/lmms_eval/tasks/mmbench/mmbench_en_dev_lite.yaml
+++ b/lmms_eval/tasks/mmbench/mmbench_en_dev_lite.yaml
@@ -5,7 +5,7 @@ dataset_name: mmbench_en_dev
 dataset_kwargs:
   token: True
 doc_to_target: "answer"
-model_specific_prompt_kwargs:
+lmms_eval_specific_kwargs:
   default:
     pre_prompt: ""
     post_prompt: "\nAnswer with the option's letter from the given choices directly."
diff --git a/lmms_eval/tasks/ok_vqa/ok_vqa_val2014_lite.yaml b/lmms_eval/tasks/ok_vqa/ok_vqa_val2014_lite.yaml
index 60b76f73e..7567e696e 100644
--- a/lmms_eval/tasks/ok_vqa/ok_vqa_val2014_lite.yaml
+++ b/lmms_eval/tasks/ok_vqa/ok_vqa_val2014_lite.yaml
@@ -20,7 +20,7 @@ metric_list:
     aggregation: !function utils.ok_vqa_aggregate_submissions
     higher_is_better: true
 process_results: !function utils.ok_vqa_process_results
-model_specific_prompt_kwargs:
+lmms_eval_specific_kwargs:
   default:
     pre_prompt: ""
     post_prompt: "\nWhen the provided information is insufficient, respond with 'Unanswerable'.\nAnswer the question using a single word or phrase."
diff --git a/lmms_eval/tasks/seedbench_2_plus/seedbench_2_plus.yaml b/lmms_eval/tasks/seedbench_2_plus/seedbench_2_plus.yaml
index c47a3dbae..716206769 100755
--- a/lmms_eval/tasks/seedbench_2_plus/seedbench_2_plus.yaml
+++ b/lmms_eval/tasks/seedbench_2_plus/seedbench_2_plus.yaml
@@ -31,7 +31,7 @@ metric_list:
 metadata:
   - version: 0.0
 
-model_specific_prompt_kwargs:
+lmms_eval_specific_kwargs:
   llava :
     img_token : <image>
     post_prompt : "Answer with the option's letter from the given choices directly."
diff --git a/lmms_eval/tasks/textcaps/textcaps_val_lite.yaml b/lmms_eval/tasks/textcaps/textcaps_val_lite.yaml
index 95525e076..a72a40b60 100644
--- a/lmms_eval/tasks/textcaps/textcaps_val_lite.yaml
+++ b/lmms_eval/tasks/textcaps/textcaps_val_lite.yaml
@@ -43,6 +43,6 @@ metric_list:
   #  higher_is_better : true
 metadata:
   - version: 0.0
-model_specific_prompt_kwargs:
+lmms_eval_specific_kwargs:
   default:
     prompt: Provide a one-sentence caption for the provided image.
\ No newline at end of file
diff --git a/lmms_eval/tasks/textvqa/textvqa_val_lite.yaml b/lmms_eval/tasks/textvqa/textvqa_val_lite.yaml
index 5438228a8..dc18a09fc 100644
--- a/lmms_eval/tasks/textvqa/textvqa_val_lite.yaml
+++ b/lmms_eval/tasks/textvqa/textvqa_val_lite.yaml
@@ -19,7 +19,7 @@ generation_kwargs:
   until:
     - "ASSISTANT:"
 process_results: !function utils.textvqa_process_results
-model_specific_prompt_kwargs:
+lmms_eval_specific_kwargs:
   default:
     pre_prompt: ""
     post_prompt: "\nAnswer the question using a single word or phrase."
diff --git a/lmms_eval/tasks/vibe_eval/utils.py b/lmms_eval/tasks/vibe_eval/utils.py
index 09b0c58a2..71ad4afa3 100644
--- a/lmms_eval/tasks/vibe_eval/utils.py
+++ b/lmms_eval/tasks/vibe_eval/utils.py
@@ -8,6 +8,8 @@
 import os
 from copy import deepcopy
 
+from loguru import logger as eval_logger
+
 try:
     from reka import ChatMessage
     from reka.client import Reka
diff --git a/lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_val_lite.yaml b/lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_val_lite.yaml
index d686d3ff0..915fa7cdf 100644
--- a/lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_val_lite.yaml
+++ b/lmms_eval/tasks/vizwiz_vqa/vizwiz_vqa_val_lite.yaml
@@ -11,7 +11,7 @@ generation_kwargs:
     - "ASSISTANT:"
 metadata:
   - version: 0.0
-model_specific_prompt_kwargs:
+lmms_eval_specific_kwargs:
   default:
     pre_prompt: ""
     post_prompt: "\nWhen the provided information is insufficient, respond with 'Unanswerable'.\nAnswer the question using a single word or phrase."
diff --git a/lmms_eval/tasks/vqav2/vqav2_val_lite.yaml b/lmms_eval/tasks/vqav2/vqav2_val_lite.yaml
index 1446c8df6..c0211c626 100644
--- a/lmms_eval/tasks/vqav2/vqav2_val_lite.yaml
+++ b/lmms_eval/tasks/vqav2/vqav2_val_lite.yaml
@@ -11,7 +11,7 @@ generation_kwargs:
   max_new_tokens: 16
 metadata:
   - version: 0.0
-model_specific_prompt_kwargs:
+lmms_eval_specific_kwargs:
   default:
     pre_prompt: ""
     post_prompt: "\nAnswer the question using a single word or phrase."