Skip to content

Commit c65118d

Browse files
authored
Merge pull request #138 from EvolvingLMMs-Lab/internal_main_dev
[Sync Features] add vila, add wildvision, add vibe-eval, add interleave bench
2 parents a60e4e0 + e31cd78 commit c65118d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

70 files changed

+3308
-486
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ temp
1313
__pycache__
1414
.ipynb_checkpoints
1515
temp
16+
.DS_STORE
1617
# IPython
1718
profile_default/
1819
ipython_config.py

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212

1313
## Annoucement
1414

15-
- [2024-06] 🎬🎬 The `lmms-eval/v0.2` has been upgraded to support video evaluations for video models like LLaVA-NeXT Video and Gemini 1.5 Pro across tasks such as EgoSchema, PerceptionTest, VideoMME, and more. Please refer to the [blog](https://lmms-lab.github.io/posts/lmms-eval-0.2/) for more details
15+
- [2024-07] 👨‍💻👨‍💻 The `lmms-eval/v0.2.1` has been upgraded to support more models, including [LongVA](https://github.com/EvolvingLMMs-Lab/LongVA), [InterVL-2](https://github.com/OpenGVLab/InternVL), [VILA](https://github.com/NVlabs/VILA), and many more evaluation tasks, e.g. [Details Captions](https://github.com/EvolvingLMMs-Lab/lmms-eval/pull/136), [MLVU](https://arxiv.org/abs/2406.04264), [WildVision-Bench](https://huggingface.co/datasets/WildVision/wildvision-arena-data), [VITATECS](https://github.com/lscpku/VITATECS) and [LLaVA-Interleave-Bench](https://llava-vl.github.io/blog/2024-06-16-llava-next-interleave/).
16+
17+
- [2024-06] 🎬🎬 The `lmms-eval/v0.2.0` has been upgraded to support video evaluations for video models like LLaVA-NeXT Video and Gemini 1.5 Pro across tasks such as EgoSchema, PerceptionTest, VideoMME, and more. Please refer to the [blog](https://lmms-lab.github.io/posts/lmms-eval-0.2/) for more details
1618

1719
- [2024-03] 📝📝 We have released the first version of `lmms-eval`, please refer to the [blog](https://lmms-lab.github.io/posts/lmms-eval-0.1/) for more details
1820

lmms_eval/__main__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,6 @@ def cli_evaluate(args: Union[argparse.Namespace, None] = None) -> None:
165165
# reset logger
166166
eval_logger.remove()
167167
eval_logger.add(sys.stdout, colorize=True, level=args.verbosity)
168-
eval_logger.add(sys.stderr, level=args.verbosity)
169168
eval_logger.info(f"Verbosity set to {args.verbosity}")
170169
os.environ["TOKENIZERS_PARALLELISM"] = "false"
171170

lmms_eval/api/samplers.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,9 @@ def get_context(self, doc, num_fewshot):
3737
+ (
3838
str(self.doc_to_target(doc)[0])
3939
if type(self.doc_to_target(doc)) is list
40-
else self.doc_to_target(doc) if (self.config.doc_to_choice is None or type(self.doc_to_target(doc)) is str) else str(self.doc_to_choice(doc)[self.doc_to_target(doc)])
40+
else self.doc_to_target(doc)
41+
if (self.config.doc_to_choice is None or type(self.doc_to_target(doc)) is str)
42+
else str(self.doc_to_choice(doc)[self.doc_to_target(doc)])
4143
)
4244
for doc in selected_docs
4345
]

lmms_eval/evaluator.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -327,12 +327,7 @@ def evaluate(
327327
# hack: remove image columns to speed avoid loading images and speed up postprocessing
328328
# reason: doc_iterator will actually load image if it's in the doc.
329329
docs = task.test_docs() if task.has_test_docs() else task.validation_docs()
330-
if "d170" not in task_name \
331-
and "dc100" not in task_name \
332-
and "dc200" not in task_name \
333-
and "llava_wilder" not in task_name \
334-
and "livebench" not in task_name \
335-
and "wildvision" not in task_name:
330+
if "d170" not in task_name and "dc100" not in task_name and "dc200" not in task_name and "llava_wilder" not in task_name and "livebench" not in task_name and "wildvision" not in task_name:
336331
remove_cols = []
337332
features = docs.features
338333
# If it is an Image instance or a Sequence of Image instance. Remove it

lmms_eval/models/__init__.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
from loguru import logger
55
import sys
66

7+
import hf_transfer
8+
9+
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
10+
711
logger.remove()
812
logger.add(sys.stdout, level="WARNING")
913

@@ -25,6 +29,7 @@
2529
"llava_sglang": "LlavaSglang",
2630
"idefics2": "Idefics2",
2731
"internvl": "InternVLChat",
32+
"internvl2": "InternVL2",
2833
"gemini_api": "GeminiAPI",
2934
"reka": "Reka",
3035
"from_log": "FromLog",
@@ -33,14 +38,16 @@
3338
"tinyllava": "TinyLlava",
3439
"llava_hf": "LlavaHf",
3540
"longva": "LongVA",
41+
"llava_hf": "LlavaHf",
42+
"longva": "LongVA",
43+
"vila": "VILA",
3644
}
3745

3846
for model_name, model_class in AVAILABLE_MODELS.items():
3947
try:
4048
exec(f"from .{model_name} import {model_class}")
4149
except ImportError as e:
42-
# logger.warning(f"Failed to import {model_class} from {model_name}: {e}")
43-
pass
50+
logger.warning(f"Failed to import {model_class} from {model_name}: {e}")
4451

4552
if os.environ.get("LMMS_EVAL_PLUGINS", None):
4653
# Allow specifying other packages to import models from
@@ -50,8 +57,4 @@
5057
try:
5158
exec(f"from {plugin}.models.{model_name} import {model_class}")
5259
except ImportError:
53-
pass
54-
55-
import hf_transfer
56-
57-
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
60+
logger.warning(f"Failed to import {model_class} from {model_name}")

lmms_eval/models/batch_gpt4.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def __init__(
5959
api_key: str = API_KEY,
6060
api_url: str = API_URL,
6161
modality: str = "image",
62-
max_frames_for_video: int = 10,
62+
max_frames_num: int = 10,
6363
timeout: int = 120,
6464
**kwargs,
6565
) -> None:
@@ -69,7 +69,7 @@ def __init__(
6969
# Here we just use the same token as llava for convenient
7070
self.model_version = model_version
7171
self.modality = modality
72-
self.max_frames_for_video = max_frames_for_video
72+
self.max_frames_num = max_frames_num
7373
self.image_token = "<image>"
7474
self.timeout = timeout
7575

@@ -128,7 +128,7 @@ def generate_until(self, requests):
128128
img = self.encode_image(visual)
129129
imgs.append(img)
130130
elif self.modality == "video":
131-
frames = self.encode_video(visual, self.max_frames_for_video)
131+
frames = self.encode_video(visual, self.max_frames_num)
132132
imgs.extend(frames)
133133

134134
messages = []

lmms_eval/models/claude.py

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def __init__(
4040
image_token: str = "<image>", # Use to separate interleaved image and text
4141
system_prompt: str = "", # Whether you want some special system prompt here
4242
modality: str = "image",
43+
max_frames_num: int = 10,
4344
continual_mode: bool = False,
4445
response_persistent_folder: str = None,
4546
**kwargs,
@@ -49,20 +50,24 @@ def __init__(
4950
self.image_token = image_token
5051
self.system_prompt = system_prompt
5152
self.modality = modality
53+
self.max_frames_num = max_frames_num
5254

5355
self.continual_mode = continual_mode
54-
if self.continual_mode and response_persistent_folder is None:
55-
raise ValueError("Continual mode requires a persistent path for the response. Please provide a valid path.")
56-
self.response_persistent_folder = response_persistent_folder
57-
self.response_persistent_file = os.path.join(self.response_persistent_folder, f"{self.model_version}_response.json")
58-
59-
if os.path.exists(self.response_persistent_file):
60-
with open(self.response_persistent_file, "r") as f:
61-
self.response_cache = json.load(f)
62-
self.cache_mode = "resume"
63-
else:
64-
self.response_cache = {}
65-
self.cache_mode = "start"
56+
if self.continual_mode:
57+
if response_persistent_folder is None:
58+
raise ValueError("Continual mode requires a persistent path for the response. Please provide a valid path.")
59+
60+
os.makedirs(response_persistent_folder, exist_ok=True)
61+
self.response_persistent_folder = response_persistent_folder
62+
self.response_persistent_file = os.path.join(self.response_persistent_folder, f"{self.model_version}_response.json")
63+
64+
if os.path.exists(self.response_persistent_file):
65+
with open(self.response_persistent_file, "r") as f:
66+
self.response_cache = json.load(f)
67+
self.cache_mode = "resume"
68+
else:
69+
self.response_cache = {}
70+
self.cache_mode = "start"
6671

6772
accelerator = Accelerator()
6873
if accelerator.num_processes > 1:
@@ -81,7 +86,7 @@ def __init__(
8186

8287
def encode_image(self, image):
8388
output_buffer = BytesIO()
84-
image.save(output_buffer, format="PNG")
89+
image.save(output_buffer, format="JPEG")
8590
byte_data = output_buffer.getvalue()
8691
base64_str = base64.b64encode(byte_data).decode("utf-8")
8792
return base64_str
@@ -129,18 +134,18 @@ def shrink_image_to_file_size(self, img: Image, max_file_size=4838990) -> Image:
129134
def encode_video(self, video_path):
130135
vr = VideoReader(video_path, ctx=cpu(0))
131136
total_frame_num = len(vr)
132-
uniform_sampled_frames = np.linspace(0, total_frame_num - 1, self.max_frames_for_video, dtype=int)
137+
uniform_sampled_frames = np.linspace(0, total_frame_num - 1, self.max_frames_num, dtype=int)
133138
frame_idx = uniform_sampled_frames.tolist()
134139
frames = vr.get_batch(frame_idx).asnumpy()
135140

136141
base64_frames = []
137142
for frame in frames:
138143
img = Image.fromarray(frame)
139144
output_buffer = BytesIO()
140-
img.save(output_buffer, format="PNG")
145+
img.save(output_buffer, format="JPEG")
141146
byte_data = output_buffer.getvalue()
142147
base64_str = base64.b64encode(byte_data).decode("utf-8")
143-
base64_frames.append(f"data:image/jpeg;base64,{base64_str}")
148+
base64_frames.append(f"{base64_str}")
144149

145150
return base64_frames
146151

@@ -154,7 +159,7 @@ def generate_until(self, requests) -> List[str]:
154159
"type": "image",
155160
"source": {
156161
"type": "base64",
157-
"media_type": "image/png",
162+
"media_type": "image/jpeg",
158163
},
159164
}
160165
empty_text_block = {"type": "text"}
@@ -218,10 +223,12 @@ def generate_until(self, requests) -> List[str]:
218223

219224
if "max_new_tokens" not in gen_kwargs:
220225
gen_kwargs["max_new_tokens"] = 1024
226+
if gen_kwargs["max_new_tokens"] > 4096:
227+
gen_kwargs["max_new_tokens"] = 4096
221228
if "temperature" not in gen_kwargs:
222229
gen_kwargs["temperature"] = 0
223-
if "top_p" not in gen_kwargs:
224-
gen_kwargs["top_p"] = None
230+
if "top_p" not in gen_kwargs or gen_kwargs["top_p"] is None:
231+
gen_kwargs["top_p"] = 1
225232
if "num_beams" not in gen_kwargs:
226233
gen_kwargs["num_beams"] = 1
227234

@@ -238,11 +245,13 @@ def generate_until(self, requests) -> List[str]:
238245
pbar.update(1)
239246
continue
240247

248+
response_text = message.content[0].text
241249
res.append(message.content[0].text)
242250
pbar.update(1)
243251

244252
###################### CONTINUAL MODE ######################
245253
if self.continual_mode is True: # Cache the response
254+
response_text = message.content[0].text
246255
doc_uuid = f"{task}___{split}___{doc_id}"
247256
self.response_cache[doc_uuid] = response_text
248257
with open(self.response_persistent_file, "w") as f:

lmms_eval/models/gemini_api.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
class GeminiAPI(lmms):
3232
def __init__(
3333
self,
34-
model_version: str = "gemini-1.5-flash-latest",
34+
model_version: str = "gemini-1.5-pro",
3535
modality: str = "image",
3636
timeout: int = 120,
3737
continual_mode: bool = False,
@@ -46,6 +46,8 @@ def __init__(
4646
if self.continual_mode and response_persistent_folder is None:
4747
raise ValueError("Continual mode requires a persistent path for the response. We will cache the Gemini API response in this path and use it for future requests. Please provide a valid path.")
4848
self.response_persistent_folder = response_persistent_folder
49+
if not os.path.exists(self.response_persistent_folder):
50+
os.makedirs(self.response_persistent_folder)
4951
self.response_persistent_file = os.path.join(self.response_persistent_folder, f"{self.model_version}_response.json")
5052

5153
if os.path.exists(self.response_persistent_file):

0 commit comments

Comments
 (0)