diff --git a/README_en.md b/README_en.md index 21438d7..4ceb8d5 100644 --- a/README_en.md +++ b/README_en.md @@ -635,7 +635,7 @@ This project is developed by the following institutions: 👏 Welcome to explore other multimodal projects of our team: -[VisCPM](https://github.com/OpenBMB/VisCPM/tree/main) | [RLHF-V](https://github.com/RLHF-V/RLHF-V) | [LLaVA-UHD](https://github.com/thunlp/LLaVA-UHD) +[VisCPM](https://github.com/OpenBMB/VisCPM/tree/main) | [RLHF-V](https://github.com/RLHF-V/RLHF-V) | [LLaVA-UHD](https://github.com/thunlp/LLaVA-UHD) | [RLAIF-V](https://github.com/RLHF-V/RLAIF-V) ## Citation diff --git a/finetune/finetune.py b/finetune/finetune.py index 808700a..708be3d 100644 --- a/finetune/finetune.py +++ b/finetune/finetune.py @@ -46,6 +46,7 @@ class TrainingArguments(transformers.TrainingArguments): ) tune_vision: Optional[bool] = field(default=True) tune_llm: Optional[bool] = field(default=True) + llm_type: str = field(default="minicpm") def rank0_print(*args): @@ -166,10 +167,11 @@ def train(): model.llm.requires_grad_(False) rank0_print(get_parameter_number(model)) - llm_type = "minicpm" - if "llama3" in model.name_or_path.lower(): + llm_type = training_args.llm_type + if llm_type == "llama3": tokenizer.chat_template = "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}" - llm_type = "llama3" + + rank0_print(f'llm_type={llm_type}') # Load data if hasattr(model.config, "slice_config"): diff --git a/finetune/finetune_ds.sh b/finetune/finetune_ds.sh index 32f7f80..a904ce2 100644 --- a/finetune/finetune_ds.sh +++ b/finetune/finetune_ds.sh @@ -6,11 +6,12 @@ NODE_RANK=0 MASTER_ADDR=localhost MASTER_PORT=6001 -MODEL="path/to/minicpmv2" +MODEL="openbmb/MiniCPM-Llama3-V-2_5" # or openbmb/MiniCPM-V-2 # ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations. # See the section for finetuning in README for more information. DATA="path/to/trainging_data" EVAL_DATA="path/to/test_data" +LLM_TYPE="llama3" # if use openbmb/MiniCPM-V-2, please set LLM_TYPE=minicpm DISTRIBUTED_ARGS=" --nproc_per_node $GPUS_PER_NODE \ @@ -21,6 +22,7 @@ DISTRIBUTED_ARGS=" " torchrun $DISTRIBUTED_ARGS finetune.py \ --model_name_or_path $MODEL \ + --llm_type $LLM_TYPE \ --data_path $DATA \ --eval_data_path $EVAL_DATA \ --remove_unused_columns false \ diff --git a/finetune/readme.md b/finetune/readme.md index 1dd1414..8eb1de7 100644 --- a/finetune/readme.md +++ b/finetune/readme.md @@ -52,12 +52,13 @@ For the vision-language example with image, you are required to provide **\