Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions examples/models/sglang_qwen3vl.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#!/bin/bash

# Qwen3-VL Evaluation Script with SGLang Backend
# This script demonstrates how to evaluate Qwen3-VL models using SGLang for accelerated inference
#
# Requirements:
# - sglang>=0.4.6
# - qwen-vl-utils
# - CUDA-enabled GPU(s)
#
# Installation:
# uv add "sglang[all]" qwen-vl-utils
# OR
# pip install "sglang[all]>=0.4.6" qwen-vl-utils

# ============================================================================
# Configuration
# ============================================================================

# Model Configuration
# Available Qwen3-VL models:
# - Qwen/Qwen3-VL-30B-A3B-Instruct
# - Qwen/Qwen3-VL-30B-A3B-Thinking
# - Qwen/Qwen3-VL-235B-A22B-Instruct
# - Qwen/Qwen3-VL-235B-A22B-Thinking
MODEL="Qwen/Qwen3-VL-30B-A3B-Instruct"

# Parallelization Settings
# Adjust based on your GPU configuration
TENSOR_PARALLEL_SIZE=4 # Number of GPUs for tensor parallelism (tp_size in SGLang)

# Memory and Performance Settings
GPU_MEMORY_UTILIZATION=0.85 # mem_fraction_static in SGLang (0.0 - 1.0)
BATCH_SIZE=64 # Batch size for evaluation

# SGLang Specific Settings
MAX_PIXELS=1605632 # Maximum pixels for image processing
MIN_PIXELS=784 # Minimum pixels (28x28)
MAX_FRAME_NUM=32 # Maximum number of video frames
THREADS=16 # Number of threads for decoding visuals

# Task Configuration
# Common tasks: mmmu_val, mme, mathvista, ai2d, etc.
TASKS="mmmu_val,mme"

# Output Configuration
OUTPUT_PATH="./logs/qwen3vl_sglang"
LOG_SAMPLES=true
LOG_SUFFIX="qwen3vl_sglang"

# Evaluation Limits (optional)
# LIMIT=100 # Uncomment to limit number of samples (for testing)

# ============================================================================
# Environment Configuration
# ============================================================================
export HF_HOME="${HF_HOME:-$HOME/.cache/huggingface}"

# ============================================================================
# Run Evaluation
# ============================================================================

echo "=========================================="
echo "Qwen3-VL Evaluation with SGLang"
echo "=========================================="
echo "Model: $MODEL"
echo "Tensor Parallel Size: $TENSOR_PARALLEL_SIZE"
echo "Tasks: $TASKS"
echo "Batch Size: $BATCH_SIZE"
echo "Max Pixels: $MAX_PIXELS"
echo "Output Path: $OUTPUT_PATH"
echo "=========================================="

# Build the command
CMD="uv run python -m lmms_eval \
--model sglang_runtime \
--model_args model=${MODEL},tensor_parallel_size=${TENSOR_PARALLEL_SIZE},gpu_memory_utilization=${GPU_MEMORY_UTILIZATION},max_pixels=${MAX_PIXELS},min_pixels=${MIN_PIXELS},max_frame_num=${MAX_FRAME_NUM},threads=${THREADS} \
--tasks ${TASKS} \
--batch_size ${BATCH_SIZE} \
--output_path ${OUTPUT_PATH}"

# Add optional arguments
if [ "$LOG_SAMPLES" = true ]; then
CMD="$CMD --log_samples --log_samples_suffix ${LOG_SUFFIX}"
fi

if [ ! -z "$LIMIT" ]; then
CMD="$CMD --limit ${LIMIT}"
fi

# Execute
echo "Running command:"
echo "$CMD"
echo ""

eval $CMD

echo ""
echo "=========================================="
echo "Evaluation Complete!"
echo "Results saved to: $OUTPUT_PATH"
echo "=========================================="
99 changes: 99 additions & 0 deletions examples/models/vllm_qwen3vl.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/bin/bash

# Qwen3-VL Evaluation Script with vLLM Backend
# This script demonstrates how to evaluate Qwen3-VL models using vLLM for accelerated inference
#
# Requirements:
# - vllm>=0.11.0
# - qwen-vl-utils
# - CUDA-enabled GPU(s)
#
# Installation:
# uv add vllm qwen-vl-utils
# OR
# pip install vllm>=0.11.0 qwen-vl-utils

# ============================================================================
# Configuration
# ============================================================================

# Model Configuration
# Available Qwen3-VL models:
# - Qwen/Qwen3-VL-30B-A3B-Instruct
# - Qwen/Qwen3-VL-30B-A3B-Thinking
# - Qwen/Qwen3-VL-235B-A22B-Instruct
# - Qwen/Qwen3-VL-235B-A22B-Thinking
MODEL="Qwen/Qwen3-VL-30B-A3B-Instruct"

# Parallelization Settings
# Adjust based on your GPU configuration
TENSOR_PARALLEL_SIZE=4 # Number of GPUs for tensor parallelism
DATA_PARALLEL_SIZE=1 # Number of GPUs for data parallelism

# Memory and Performance Settings
GPU_MEMORY_UTILIZATION=0.85 # Fraction of GPU memory to use (0.0 - 1.0)
BATCH_SIZE=64 # Batch size for evaluation

# Task Configuration
# Common tasks: mmmu_val, mme, mathvista, ai2d, etc.
TASKS="mmmu_val,mme"

# Output Configuration
OUTPUT_PATH="./logs/qwen3vl_vllm"
LOG_SAMPLES=true
LOG_SUFFIX="qwen3vl_vllm"

# Evaluation Limits (optional)
# LIMIT=100 # Uncomment to limit number of samples (for testing)

# ============================================================================
# NCCL Configuration (for multi-GPU setups)
# ============================================================================
export NCCL_BLOCKING_WAIT=1
export NCCL_TIMEOUT=18000000
# export NCCL_DEBUG=INFO # Uncomment for debugging

# ============================================================================
# Run Evaluation
# ============================================================================

echo "=========================================="
echo "Qwen3-VL Evaluation with vLLM"
echo "=========================================="
echo "Model: $MODEL"
echo "Tensor Parallel Size: $TENSOR_PARALLEL_SIZE"
echo "Data Parallel Size: $DATA_PARALLEL_SIZE"
echo "Tasks: $TASKS"
echo "Batch Size: $BATCH_SIZE"
echo "Output Path: $OUTPUT_PATH"
echo "=========================================="

# Build the command
CMD="uv run python -m lmms_eval \
--model vllm_chat \
--model_args model=${MODEL},tensor_parallel_size=${TENSOR_PARALLEL_SIZE},data_parallel_size=${DATA_PARALLEL_SIZE},gpu_memory_utilization=${GPU_MEMORY_UTILIZATION} \
--tasks ${TASKS} \
--batch_size ${BATCH_SIZE} \
--output_path ${OUTPUT_PATH}"

# Add optional arguments
if [ "$LOG_SAMPLES" = true ]; then
CMD="$CMD --log_samples --log_samples_suffix ${LOG_SUFFIX}"
fi

if [ ! -z "$LIMIT" ]; then
CMD="$CMD --limit ${LIMIT}"
fi

# Execute
echo "Running command:"
echo "$CMD"
echo ""

eval $CMD

echo ""
echo "=========================================="
echo "Evaluation Complete!"
echo "Results saved to: $OUTPUT_PATH"
echo "=========================================="
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "lmms_eval"
version = "0.4.1"
version = "0.5.0"
authors = [
{ name = "LMMMs-Lab Evaluation Team", email = "[email protected]" },
]
Expand Down Expand Up @@ -43,7 +43,7 @@ dependencies = [
"ftfy",
"openai",
"opencv-python-headless",
"av",
"av<16.0.0",
"hf_transfer",
"nltk",
"sentencepiece",
Expand Down Expand Up @@ -74,6 +74,7 @@ dependencies = [
"protobuf",
"sentence_transformers",
"python-dotenv",
"qwen-vl-utils>=0.0.14",
]

[project.optional-dependencies]
Expand Down