Files
MiniCPM-o/eval_mm/vlmevalkit/scripts/run_inference.sh
2025-01-21 15:34:54 +08:00

42 lines
1.6 KiB
Bash

export PATH=/usr/local/cuda/bin:$PATH
export HF_ENDPOINT=https://hf-mirror.com
export OMP_NUM_THREADS=1
export timestamp=`date +"%Y%m%d%H%M%S"`
export OLD_VERSION='False'
export PYTHONPATH=$(dirname $SELF_DIR):$PYTHONPATH
export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
# gpu consumed
# fp16 17-18G
# int4 7-8G
# model to be used
# Example: MODELNAME=MiniCPM-o-2_6
MODELNAME=$1
# datasets to be tested
# Example: DATALIST=MMMU_DEV_VAL
DATALIST=$2
# run on multi gpus with torchrun command
# remember to run twice, the first run may fail
for DATASET in $DATALIST; do
echo "Starting inference with model $MODELNAME on dataset $DATASET"
torchrun --master_port 29500 --nproc_per_node=8 run.py --data $DATASET --model $MODELNAME --mode infer --reuse
torchrun --master_port 29501 --nproc_per_node=8 run.py --data $DATASET --model $MODELNAME --mode infer --reuse
# for benchmarks which require gpt for scoring, you need to specify OPENAI_API_BASE and OPENAI_API_KEY in .env file
if [[ "$DATASET" == *"MMBench_TEST"*]]; then
echo "Skipping evaluation for dataset $DATASET"
else
echo "Starting evaluation with model $MODELNAME on datasets $DATASET"
python run.py --data $DATASET --model $MODELNAME --nproc 16 --verbose
fi
done
# run on single gpu with python command
# python run.py --data $DATALIST --model $MODELNAME --verbose --mode infer
# python run.py --data $DATALIST --model $MODELNAME --verbose --mode infer
# echo "Starting evaluation with model $MODELNAME on datasets $DATASET"
# python run.py --data $DATASET --model $MODELNAME --nproc 16 --verbose