mirror of
https://github.com/OpenBMB/MiniCPM-V.git
synced 2026-02-05 18:29:18 +08:00
Modify eval_mm for MiniCPM-V 2.6
This commit is contained in:
@@ -7,10 +7,53 @@ from uuid import uuid4
|
||||
import os.path as osp
|
||||
import base64
|
||||
from PIL import Image
|
||||
from .file import load, dump
|
||||
import sys
|
||||
|
||||
Image.MAX_IMAGE_PIXELS = 1e9
|
||||
|
||||
|
||||
def rescale_img(img, tgt=None):
|
||||
assert isinstance(tgt, tuple) and -1 in tgt
|
||||
w, h = img.size
|
||||
if tgt[0] != -1:
|
||||
new_w, new_h = tgt[0], int(tgt[0] / w * h)
|
||||
elif tgt[1] != -1:
|
||||
new_w, new_h = int(tgt[1] / h * w), tgt[1]
|
||||
img = img.resize((new_w, new_h))
|
||||
return img
|
||||
|
||||
|
||||
def concat_images_vlmeval(images, target_size=-1, mode='h', return_image=False):
|
||||
from .file import md5
|
||||
|
||||
ims = [Image.open(im) for im in images]
|
||||
if target_size != -1:
|
||||
ims = [
|
||||
rescale_img(im, (-1, target_size) if mode == 'h' else (target_size, -1))
|
||||
for im in ims
|
||||
]
|
||||
|
||||
ws, hs = [x.width for x in ims], [x.height for x in ims]
|
||||
if mode == 'h':
|
||||
new_w, new_h = sum(ws), max(hs)
|
||||
dst = Image.new('RGB', (new_w, new_h))
|
||||
for i, im in enumerate(ims):
|
||||
dst.paste(im, (sum(ws[:i]), 0))
|
||||
elif mode == 'v':
|
||||
new_w, new_h = max(ws), sum(hs)
|
||||
dst = Image.new('RGB', (new_w, new_h))
|
||||
for i, im in enumerate(ims):
|
||||
dst.paste(im, (sum(ws[:i], 0)))
|
||||
if return_image:
|
||||
return dst
|
||||
else:
|
||||
_str = '\n'.join(images)
|
||||
str_md5 = md5(_str)
|
||||
tgt = osp.join('/tmp', str_md5 + '.jpg')
|
||||
dst.save(tgt)
|
||||
return tgt
|
||||
|
||||
|
||||
def mmqa_display(question, target_size=512):
|
||||
question = {k.lower(): v for k, v in question.items()}
|
||||
keys = list(question.keys())
|
||||
@@ -41,14 +84,12 @@ def encode_image_to_base64(img, target_size=-1):
|
||||
# else, will set the max_size ot (target_size, target_size)
|
||||
if img.mode in ('RGBA', 'P'):
|
||||
img = img.convert('RGB')
|
||||
tmp = osp.join('/tmp', str(uuid4()) + '.jpg')
|
||||
if target_size > 0:
|
||||
img.thumbnail((target_size, target_size))
|
||||
img.save(tmp)
|
||||
with open(tmp, 'rb') as image_file:
|
||||
image_data = image_file.read()
|
||||
img_buffer = io.BytesIO()
|
||||
img.save(img_buffer, format='JPEG')
|
||||
image_data = img_buffer.getvalue()
|
||||
ret = base64.b64encode(image_data).decode('utf-8')
|
||||
os.remove(tmp)
|
||||
return ret
|
||||
|
||||
|
||||
@@ -110,6 +151,7 @@ def circular_pred(df, extract_func=None):
|
||||
extract_func = lambda x: x # noqa: E731
|
||||
df = df.sort_values('index')
|
||||
from vlmeval.utils import can_infer_option
|
||||
|
||||
shift = int(1e6)
|
||||
|
||||
choices = [extract_func(x) for x in df['prediction']]
|
||||
@@ -118,9 +160,12 @@ def circular_pred(df, extract_func=None):
|
||||
valid_map = {i: True for i in pred_map if i < 1e6}
|
||||
for i in df['index']:
|
||||
if i >= shift and pred_map[i] and pred_map[i - shift]:
|
||||
if (
|
||||
pred_map[i] not in list(string.ascii_uppercase) or # noqa: W504
|
||||
pred_map[i - shift] not in list(string.ascii_uppercase)
|
||||
if pred_map[i] not in list(
|
||||
string.ascii_uppercase
|
||||
) or pred_map[ # noqa: W504
|
||||
i - shift
|
||||
] not in list(
|
||||
string.ascii_uppercase
|
||||
):
|
||||
|
||||
valid_map[i % shift] = False
|
||||
|
||||
Reference in New Issue
Block a user