diff --git a/README.md b/README.md
index 6865558..b1997f2 100644
--- a/README.md
+++ b/README.md
@@ -492,7 +492,7 @@ pip install -r requirements.txt
 | Model           | Device | Memory    | &emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp; Description       | Download |
 |:-----------|:--:|:-----------:|:-------------------|:---------------:|
 | MiniCPM-Llama3-V 2.5 | GPU | 19 GB | The lastest version, achieving state-of-the end-side multimodal performance.   |  [🤗](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5/) &nbsp;&nbsp; [<img src="./assets/modelscope_logo.png" width="20px"></img>](https://modelscope.cn/models/OpenBMB/MiniCPM-Llama3-V-2_5) |
-| MiniCPM-Llama3-V 2.5 gguf | CPU  | 5 GB | The gguf version, lower GPU memory and faster inference.   |  [🤗](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf) &nbsp;&nbsp;[<img src="./assets/modelscope_logo.png" width="20px"></img>](https://modelscope.cn/models/OpenBMB/MiniCPM-Llama3-V-2_5-gguf) |
+| MiniCPM-Llama3-V 2.5 gguf | CPU  | 5 GB | The gguf version, lower memory usage and faster inference.   |  [🤗](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf) &nbsp;&nbsp;[<img src="./assets/modelscope_logo.png" width="20px"></img>](https://modelscope.cn/models/OpenBMB/MiniCPM-Llama3-V-2_5-gguf) |
 | MiniCPM-Llama3-V 2.5 int4 | GPU | 8 GB | The int4 quantized version，lower GPU memory usage. |  [🤗](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-int4/) &nbsp;&nbsp; [<img src="./assets/modelscope_logo.png" width="20px"></img>](https://modelscope.cn/models/OpenBMB/MiniCPM-Llama3-V-2_5-int4) |
 | MiniCPM-V 2.0 | GPU | 8 GB | Light version, balance the performance the computation cost.   |  [🤗](https://huggingface.co/openbmb/MiniCPM-V-2) &nbsp;&nbsp; [<img src="./assets/modelscope_logo.png" width="20px"></img>](https://modelscope.cn/models/OpenBMB/MiniCPM-V-2) |
 | MiniCPM-V 1.0 | GPU | 7 GB | Lightest version, achieving the fastest inference. |   [🤗](https://huggingface.co/openbmb/MiniCPM-V) &nbsp;&nbsp; [<img src="./assets/modelscope_logo.png" width="20px"></img>](https://modelscope.cn/models/OpenBMB/MiniCPM-V) |
diff --git a/README_en.md b/README_en.md
index 6865558..b1997f2 100644
--- a/README_en.md
+++ b/README_en.md
@@ -492,7 +492,7 @@ pip install -r requirements.txt
 | Model           | Device | Memory    | &emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp; Description       | Download |
 |:-----------|:--:|:-----------:|:-------------------|:---------------:|
 | MiniCPM-Llama3-V 2.5 | GPU | 19 GB | The lastest version, achieving state-of-the end-side multimodal performance.   |  [🤗](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5/) &nbsp;&nbsp; [<img src="./assets/modelscope_logo.png" width="20px"></img>](https://modelscope.cn/models/OpenBMB/MiniCPM-Llama3-V-2_5) |
-| MiniCPM-Llama3-V 2.5 gguf | CPU  | 5 GB | The gguf version, lower GPU memory and faster inference.   |  [🤗](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf) &nbsp;&nbsp;[<img src="./assets/modelscope_logo.png" width="20px"></img>](https://modelscope.cn/models/OpenBMB/MiniCPM-Llama3-V-2_5-gguf) |
+| MiniCPM-Llama3-V 2.5 gguf | CPU  | 5 GB | The gguf version, lower memory usage and faster inference.   |  [🤗](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-gguf) &nbsp;&nbsp;[<img src="./assets/modelscope_logo.png" width="20px"></img>](https://modelscope.cn/models/OpenBMB/MiniCPM-Llama3-V-2_5-gguf) |
 | MiniCPM-Llama3-V 2.5 int4 | GPU | 8 GB | The int4 quantized version，lower GPU memory usage. |  [🤗](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5-int4/) &nbsp;&nbsp; [<img src="./assets/modelscope_logo.png" width="20px"></img>](https://modelscope.cn/models/OpenBMB/MiniCPM-Llama3-V-2_5-int4) |
 | MiniCPM-V 2.0 | GPU | 8 GB | Light version, balance the performance the computation cost.   |  [🤗](https://huggingface.co/openbmb/MiniCPM-V-2) &nbsp;&nbsp; [<img src="./assets/modelscope_logo.png" width="20px"></img>](https://modelscope.cn/models/OpenBMB/MiniCPM-V-2) |
 | MiniCPM-V 1.0 | GPU | 7 GB | Lightest version, achieving the fastest inference. |   [🤗](https://huggingface.co/openbmb/MiniCPM-V) &nbsp;&nbsp; [<img src="./assets/modelscope_logo.png" width="20px"></img>](https://modelscope.cn/models/OpenBMB/MiniCPM-V) |
diff --git a/finetune/finetune.py b/finetune/finetune.py
index 2738555..c86916c 100644
--- a/finetune/finetune.py
+++ b/finetune/finetune.py
@@ -47,7 +47,7 @@ class TrainingArguments(transformers.TrainingArguments):
         },
     )
     tune_vision: Optional[bool] = field(default=True)
-    tune_llm: Optional[bool] = field(default=False)
+    tune_llm: Optional[bool] = field(default=True)
     llm_type: str = field(default="minicpm")
     use_lora: Optional[bool] = field(default=False)
 
@@ -252,12 +252,15 @@ def train():
             layers_to_transform=lora_args.lora_layers_to_transform,
             task_type="CAUSAL_LM",
         )
-        if training_args.gradient_checkpointing:
+        if not hasattr(model, 'get_input_embeddings'):
             def get_input_embeddings(self):
                 return self.llm.get_input_embeddings()
             model.get_input_embeddings = MethodType(get_input_embeddings, model)
         model = get_peft_model(model, lora_config)
         model.base_model.llm.model.embed_tokens.weight.requires_grad_(True)
+        if training_args.tune_vision:
+            model.base_model.vpm.requires_grad_(True)
+            model.base_model.resampler.requires_grad_(True)
         if training_args.gradient_checkpointing:
             model.enable_input_require_grads()
 
diff --git a/finetune/finetune_ds.sh b/finetune/finetune_ds.sh
index ecc6cd6..45c00fe 100644
--- a/finetune/finetune_ds.sh
+++ b/finetune/finetune_ds.sh
@@ -42,7 +42,7 @@ torchrun $DISTRIBUTED_ARGS finetune.py  \
     --output_dir output/output_minicpmv2 \
     --logging_dir output/output_minicpmv2 \
     --logging_strategy "steps" \
-    --per_device_train_batch_size 2 \
+    --per_device_train_batch_size 1 \
     --per_device_eval_batch_size 1 \
     --gradient_accumulation_steps 1 \
     --evaluation_strategy "steps" \
diff --git a/finetune/readme.md b/finetune/readme.md
index 6bf6d8d..f8ab49c 100644
--- a/finetune/readme.md
+++ b/finetune/readme.md
@@ -107,6 +107,31 @@ The following table presents the memory usage of the model when fine-tuning usin
 - **Out of memory**: Indicates that the memory was insufficient for full parameters fine-tuning under the current GPU configurations.
 
 ### Finetuning FAQs
+
+<details>
+<summary>Q: Encounter an error while using the AutoPeftModelForCausalLM to load a checkpoint that has undergone lora fine-tuning</summary>
+
+A: The error as described in [issues 168](https://github.com/OpenBMB/MiniCPM-V/issues/168) occurs because the model lacks `get_input_embeddings` and `set_input_embeddings` methods. Follow these steps to resolve this issue: 
+
+1.**Reload the Fine-Tuned Model:** Make sure you correctly load the checkpoint that has been fine-tuned using lora techniques. Use the following code example to guide you:
+   ```python
+   from peft import AutoPeftModelForCausalLM
+
+   model = AutoPeftModelForCausalLM.from_pretrained(
+       'path_to_your_fine_tuned_checkpoint',  # Path to your fine-tuned checkpoint directory
+       output='output/minicpmv2_lora',
+       device_map='auto',
+       trust_remote_code=True
+   ).eval()
+   ```
+  2.**Update the `model_minicpmv.py` File:**
+   - **Verification:** Make sure you verify and update your `model_minicpmv.py` file to ensure it is the latest version.
+   - **Update Hugging Face Library Code:** If the issue persists after updating the file, consider updating the related code in the Hugging Face library.
+   - **Direct File Copy:** For a quick resolution, directly download and copy the latest `model_minicpmv.py` file into your project. This file is available from the following sources:
+     - [MiniCPM-Llama3-V-2_5 on Hugging Face](https://huggingface.co/openbmb/MiniCPM-Llama3-V-2_5/tree/main)
+     - [MiniCPM-V-2 on Hugging Face](https://huggingface.co/openbmb/MiniCPM-V-2)
+</details>
+
 <details>
 <summary>Q: How do I use the `flash_attention_2` implementation when loading a pretrained model?</summary>
 
diff --git a/web_demo.py b/web_demo.py
index ad9c54a..a9a0727 100644
--- a/web_demo.py
+++ b/web_demo.py
@@ -154,7 +154,7 @@ def chat(img, msgs, ctx, params=None, vision_hidden_states=None):
         res = res.replace('</ref>', '')
         res = res.replace('<box>', '')
         answer = res.replace('</box>', '')
-        return -1, answer, None, None
+        return 0, answer, None, None
     except Exception as err:
         print(err)
         traceback.print_exc()
diff --git a/web_demo_2.5.py b/web_demo_2.5.py
index cc80a82..2076da3 100644
--- a/web_demo_2.5.py
+++ b/web_demo_2.5.py
@@ -151,7 +151,7 @@ def chat(img, msgs, ctx, params=None, vision_hidden_states=None):
         res = res.replace('</ref>', '')
         res = res.replace('<box>', '')
         answer = res.replace('</box>', '')
-        return -1, answer, None, None
+        return 0, answer, None, None
     except Exception as err:
         print(err)
         traceback.print_exc()