From 8befa86b5e8f6db07299369c0ac58e79957cca23 Mon Sep 17 00:00:00 2001 From: qianyu chen <38046403+qyc-98@users.noreply.github.com> Date: Thu, 15 Aug 2024 11:03:29 +0800 Subject: [PATCH] update maxlength --- finetune/dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/finetune/dataset.py b/finetune/dataset.py index 5345281..d03a325 100644 --- a/finetune/dataset.py +++ b/finetune/dataset.py @@ -33,7 +33,7 @@ class SupervisedDataset(Dataset): patch_size=14, query_nums=64, batch_vision=False, - max_length=None, + max_length=2048, ): super(SupervisedDataset, self).__init__() self.raw_data = raw_data @@ -122,7 +122,7 @@ def data_collator(examples, padding_value=0, max_length=2048): } -def conversation_to_ids(conversation, tokenizer, llm_type=None, new_schema=False, max_length=None): +def conversation_to_ids(conversation, tokenizer, llm_type=None, new_schema=False, max_length=2048): """ for single image multi-turn conversation conversation: [{'role': 'user', 'content': 'Describe this image'},