#!/bin/bash GPUS_PER_NODE=8 NNODES=1 NODE_RANK=0 MASTER_ADDR=localhost MASTER_PORT=6001 MODEL="path/to/minicpmv2" # ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations. # See the section for finetuning in README for more information. DATA="path/to/trainging_data" EVAL_DATA="path/to/test_data" DISTRIBUTED_ARGS=" --nproc_per_node $GPUS_PER_NODE \ --nnodes $NNODES \ --node_rank $NODE_RANK \ --master_addr $MASTER_ADDR \ --master_port $MASTER_PORT " torchrun $DISTRIBUTED_ARGS finetune.py \ --model_name_or_path $MODEL \ --data_path $DATA \ --eval_data_path $EVAL_DATA \ --remove_unused_columns false \ --label_names "labels" \ --prediction_loss_only false \ --bf16 true \ --bf16_full_eval true \ --do_train \ --do_eval \ --max_steps 80000 \ --eval_steps 200 \ --output_dir output/output_minicpmv2 \ --logging_dir output/output_minicpmv2 \ --logging_strategy "steps" \ --per_device_train_batch_size 8 \ --per_device_eval_batch_size 1 \ --gradient_accumulation_steps 1 \ --evaluation_strategy "steps" \ --save_strategy "steps" \ --save_steps 1000 \ --save_total_limit 10 \ --learning_rate 5e-7 \ --weight_decay 0.1 \ --adam_beta2 0.95 \ --warmup_ratio 0.01 \ --lr_scheduler_type "cosine" \ --logging_steps 1 \ --gradient_checkpointing True \ --deepspeed ds_config_zero2.json \ --report_to "tensorboard" # wandb