update finetuning code

2026-02-04 17:59:18 +08:00 · 2024-05-08 09:51:34 +08:00
parent 9f345c4020
commit f6cbd4fb25
9 changed files with 0 additions and 37 deletions
--- a/finetune/finetune_ds.sh
+++ b/finetune/finetune_ds.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+GPUS_PER_NODE=8
+NNODES=1
+NODE_RANK=0
+MASTER_ADDR=localhost
+MASTER_PORT=6001
+
+MODEL="path/to/minicpmv2" 
+# ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations.
+# See the section for finetuning in README for more information.
+DATA="path/to/trainging_data"
+EVAL_DATA="path/to/test_data"
+
+DISTRIBUTED_ARGS="
+    --nproc_per_node $GPUS_PER_NODE \
+    --nnodes $NNODES \
+    --node_rank $NODE_RANK \
+    --master_addr $MASTER_ADDR \
+    --master_port $MASTER_PORT
+"
+torchrun $DISTRIBUTED_ARGS finetune.py \
+    --model_name_or_path $MODEL \
+    --data_path $DATA \
+    --eval_data_path $EVAL_DATA \
+    --remove_unused_columns false \
+    --label_names "labels" \
+    --prediction_loss_only  false \
+    --bf16 true \
+    --bf16_full_eval true \
+    --do_train \
+    --do_eval \
+    --max_steps 80000 \
+    --eval_steps 200 \
+    --output_dir output/output_minicpmv2 \
+    --logging_dir output/output_minicpmv2 \
+    --logging_strategy "steps" \
+    --per_device_train_batch_size 8 \
+    --per_device_eval_batch_size 1 \
+    --gradient_accumulation_steps 1 \
+    --evaluation_strategy "steps" \
+    --save_strategy "steps" \
+    --save_steps 1000 \
+    --save_total_limit 10 \
+    --learning_rate 5e-7 \
+    --weight_decay 0.1 \
+    --adam_beta2 0.95 \
+    --warmup_ratio 0.01 \
+    --lr_scheduler_type "cosine" \
+    --logging_steps 1 \
+    --gradient_checkpointing True \
+    --deepspeed ds_config_zero2.json \
+    --report_to "tensorboard" # wandb