Modify eval_mm for MiniCPM-V 2.6

This commit is contained in:
Haoyu Li
2024-08-30 18:18:22 +00:00
parent ab1141ee45
commit 59224808a1
69 changed files with 8231 additions and 1818 deletions

View File

@@ -7,10 +7,53 @@ from uuid import uuid4
import os.path as osp
import base64
from PIL import Image
from .file import load, dump
import sys
Image.MAX_IMAGE_PIXELS = 1e9
def rescale_img(img, tgt=None):
assert isinstance(tgt, tuple) and -1 in tgt
w, h = img.size
if tgt[0] != -1:
new_w, new_h = tgt[0], int(tgt[0] / w * h)
elif tgt[1] != -1:
new_w, new_h = int(tgt[1] / h * w), tgt[1]
img = img.resize((new_w, new_h))
return img
def concat_images_vlmeval(images, target_size=-1, mode='h', return_image=False):
from .file import md5
ims = [Image.open(im) for im in images]
if target_size != -1:
ims = [
rescale_img(im, (-1, target_size) if mode == 'h' else (target_size, -1))
for im in ims
]
ws, hs = [x.width for x in ims], [x.height for x in ims]
if mode == 'h':
new_w, new_h = sum(ws), max(hs)
dst = Image.new('RGB', (new_w, new_h))
for i, im in enumerate(ims):
dst.paste(im, (sum(ws[:i]), 0))
elif mode == 'v':
new_w, new_h = max(ws), sum(hs)
dst = Image.new('RGB', (new_w, new_h))
for i, im in enumerate(ims):
dst.paste(im, (sum(ws[:i], 0)))
if return_image:
return dst
else:
_str = '\n'.join(images)
str_md5 = md5(_str)
tgt = osp.join('/tmp', str_md5 + '.jpg')
dst.save(tgt)
return tgt
def mmqa_display(question, target_size=512):
question = {k.lower(): v for k, v in question.items()}
keys = list(question.keys())
@@ -41,14 +84,12 @@ def encode_image_to_base64(img, target_size=-1):
# else, will set the max_size ot (target_size, target_size)
if img.mode in ('RGBA', 'P'):
img = img.convert('RGB')
tmp = osp.join('/tmp', str(uuid4()) + '.jpg')
if target_size > 0:
img.thumbnail((target_size, target_size))
img.save(tmp)
with open(tmp, 'rb') as image_file:
image_data = image_file.read()
img_buffer = io.BytesIO()
img.save(img_buffer, format='JPEG')
image_data = img_buffer.getvalue()
ret = base64.b64encode(image_data).decode('utf-8')
os.remove(tmp)
return ret
@@ -110,6 +151,7 @@ def circular_pred(df, extract_func=None):
extract_func = lambda x: x # noqa: E731
df = df.sort_values('index')
from vlmeval.utils import can_infer_option
shift = int(1e6)
choices = [extract_func(x) for x in df['prediction']]
@@ -118,9 +160,12 @@ def circular_pred(df, extract_func=None):
valid_map = {i: True for i in pred_map if i < 1e6}
for i in df['index']:
if i >= shift and pred_map[i] and pred_map[i - shift]:
if (
pred_map[i] not in list(string.ascii_uppercase) or # noqa: W504
pred_map[i - shift] not in list(string.ascii_uppercase)
if pred_map[i] not in list(
string.ascii_uppercase
) or pred_map[ # noqa: W504
i - shift
] not in list(
string.ascii_uppercase
):
valid_map[i % shift] = False