Modify eval_mm for MiniCPM-o 2.6

This commit is contained in:
Poppy Xu
2025-01-21 15:34:54 +08:00
parent ec68cefc17
commit d8f382e157
82 changed files with 14279 additions and 843 deletions

View File

@@ -5,27 +5,37 @@ export OMP_NUM_THREADS=1
export timestamp=`date +"%Y%m%d%H%M%S"`
export OLD_VERSION='False'
export PYTHONPATH=$(dirname $SELF_DIR):$PYTHONPATH
export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
# gpu consumed
# fp16 17-18G
# int4 7-8G
# model to be used
# Example: MODELNAME=MiniCPM_V_2_6
# Example: MODELNAME=MiniCPM-o-2_6
MODELNAME=$1
# datasets to be tested
# Example: DATALIST="MMMU_DEV_VAL MathVista_MINI MMVet MMBench_DEV_EN_V11 MMBench_DEV_CN_V11 MMStar HallusionBench AI2D_TEST"
# Example: DATALIST=MMMU_DEV_VAL
DATALIST=$2
# test mode, all or infer
MODE=$3
echo "Starting inference with model $MODELNAME on datasets $DATALIST"
# run on multi gpus with torchrun command
# remember to run twice, the first run may fail
torchrun --nproc_per_node=8 run.py --data $DATALIST --model $MODELNAME --mode $MODE
torchrun --nproc_per_node=8 run.py --data $DATALIST --model $MODELNAME --mode $MODE
# run on single gpu with python command
# python run.py --data $DATALIST --model $MODELNAME --verbose --mode $MODE
# python run.py --data $DATALIST --model $MODELNAME --verbose --mode $MODE
for DATASET in $DATALIST; do
echo "Starting inference with model $MODELNAME on dataset $DATASET"
torchrun --master_port 29500 --nproc_per_node=8 run.py --data $DATASET --model $MODELNAME --mode infer --reuse
torchrun --master_port 29501 --nproc_per_node=8 run.py --data $DATASET --model $MODELNAME --mode infer --reuse
ls
# for benchmarks which require gpt for scoring, you need to specify OPENAI_API_BASE and OPENAI_API_KEY in .env file
if [[ "$DATASET" == *"MMBench_TEST"*]]; then
echo "Skipping evaluation for dataset $DATASET"
else
echo "Starting evaluation with model $MODELNAME on datasets $DATASET"
python run.py --data $DATASET --model $MODELNAME --nproc 16 --verbose
fi
done
# run on single gpu with python command
# python run.py --data $DATALIST --model $MODELNAME --verbose --mode infer
# python run.py --data $DATALIST --model $MODELNAME --verbose --mode infer
# echo "Starting evaluation with model $MODELNAME on datasets $DATASET"
# python run.py --data $DATASET --model $MODELNAME --nproc 16 --verbose