Update dataset.py

fix image_id_cnt
This commit is contained in:
YuzaChongyi
2024-08-06 18:55:13 +08:00
committed by GitHub
parent 17d344e17c
commit 270261f9d2

View File

@@ -329,7 +329,7 @@ def preprocess(
for j in range(len(patches[0])):
images.append(patches[i][j])
if use_image_id:
image_placeholder = f'{tokenizer.im_id_start}{idx}{tokenizer.im_id_end}' + image_placeholder
image_placeholder = f'{tokenizer.im_id_start}{image_id_cnt}{tokenizer.im_id_end}' + image_placeholder
image_id_cnt += 1
image_placeholder += get_grid_placeholder(
tokenizer, best_grid, query_nums, new_schema = new_schema)
@@ -518,4 +518,4 @@ def reshape_by_patch(image_tensor, patch_size):
patches = patches.reshape(image_tensor.size(0), patch_size, patch_size, -1)
patches = patches.permute(0, 1, 3, 2).reshape(
image_tensor.size(0), patch_size, -1)
return patches
return patches