mirror of
https://github.com/OpenBMB/MiniCPM-V.git
synced 2026-02-04 09:49:20 +08:00
180 lines
5.1 KiB
Python
180 lines
5.1 KiB
Python
import os
|
|
import io
|
|
import pandas as pd
|
|
import numpy as np
|
|
import string
|
|
from uuid import uuid4
|
|
import os.path as osp
|
|
import base64
|
|
from PIL import Image
|
|
import sys
|
|
|
|
Image.MAX_IMAGE_PIXELS = 1e9
|
|
|
|
|
|
def rescale_img(img, tgt=None):
|
|
assert isinstance(tgt, tuple) and -1 in tgt
|
|
w, h = img.size
|
|
if tgt[0] != -1:
|
|
new_w, new_h = tgt[0], int(tgt[0] / w * h)
|
|
elif tgt[1] != -1:
|
|
new_w, new_h = int(tgt[1] / h * w), tgt[1]
|
|
img = img.resize((new_w, new_h))
|
|
return img
|
|
|
|
|
|
def concat_images_vlmeval(images, target_size=-1, mode='h', return_image=False):
|
|
from .file import md5
|
|
|
|
ims = [Image.open(im) for im in images]
|
|
if target_size != -1:
|
|
ims = [
|
|
rescale_img(im, (-1, target_size) if mode == 'h' else (target_size, -1))
|
|
for im in ims
|
|
]
|
|
|
|
ws, hs = [x.width for x in ims], [x.height for x in ims]
|
|
if mode == 'h':
|
|
new_w, new_h = sum(ws), max(hs)
|
|
dst = Image.new('RGB', (new_w, new_h))
|
|
for i, im in enumerate(ims):
|
|
dst.paste(im, (sum(ws[:i]), 0))
|
|
elif mode == 'v':
|
|
new_w, new_h = max(ws), sum(hs)
|
|
dst = Image.new('RGB', (new_w, new_h))
|
|
for i, im in enumerate(ims):
|
|
dst.paste(im, (sum(ws[:i], 0)))
|
|
if return_image:
|
|
return dst
|
|
else:
|
|
_str = '\n'.join(images)
|
|
str_md5 = md5(_str)
|
|
tgt = osp.join('/tmp', str_md5 + '.jpg')
|
|
dst.save(tgt)
|
|
return tgt
|
|
|
|
|
|
def mmqa_display(question, target_size=512):
|
|
question = {k.lower(): v for k, v in question.items()}
|
|
keys = list(question.keys())
|
|
keys = [k for k in keys if k not in ['index', 'image']]
|
|
|
|
images = question['image']
|
|
if isinstance(images, str):
|
|
images = [images]
|
|
|
|
idx = question.pop('index', 'XXX')
|
|
print(f'INDEX: {idx}')
|
|
|
|
for im in images:
|
|
image = decode_base64_to_image(im, target_size=target_size)
|
|
display(image) # noqa: F821
|
|
|
|
for k in keys:
|
|
try:
|
|
if not pd.isna(question[k]):
|
|
print(f'{k.upper()}. {question[k]}')
|
|
except ValueError:
|
|
if False in pd.isna(question[k]):
|
|
print(f'{k.upper()}. {question[k]}')
|
|
|
|
|
|
def encode_image_to_base64(img, target_size=-1, fmt='JPEG'):
|
|
# if target_size == -1, will not do resizing
|
|
# else, will set the max_size ot (target_size, target_size)
|
|
if img.mode in ('RGBA', 'P'):
|
|
img = img.convert('RGB')
|
|
if target_size > 0:
|
|
img.thumbnail((target_size, target_size))
|
|
img_buffer = io.BytesIO()
|
|
img.save(img_buffer, format=fmt)
|
|
image_data = img_buffer.getvalue()
|
|
ret = base64.b64encode(image_data).decode('utf-8')
|
|
return ret
|
|
|
|
|
|
def encode_image_file_to_base64(image_path, target_size=-1):
|
|
image = Image.open(image_path)
|
|
return encode_image_to_base64(image, target_size=target_size)
|
|
|
|
|
|
def decode_base64_to_image(base64_string, target_size=-1):
|
|
image_data = base64.b64decode(base64_string)
|
|
image = Image.open(io.BytesIO(image_data))
|
|
if image.mode in ('RGBA', 'P'):
|
|
image = image.convert('RGB')
|
|
if target_size > 0:
|
|
image.thumbnail((target_size, target_size))
|
|
return image
|
|
|
|
|
|
def decode_base64_to_image_file(base64_string, image_path, target_size=-1):
|
|
image = decode_base64_to_image(base64_string, target_size=target_size)
|
|
image.save(image_path)
|
|
|
|
|
|
def build_option_str(option_dict):
|
|
s = 'There are several options: \n'
|
|
for c, content in option_dict.items():
|
|
if not pd.isna(content):
|
|
s += f'{c}. {content}\n'
|
|
return s
|
|
|
|
|
|
def isimg(s):
|
|
return osp.exists(s) or s.startswith('http')
|
|
|
|
|
|
def read_ok(img_path):
|
|
if not osp.exists(img_path):
|
|
return False
|
|
try:
|
|
im = Image.open(img_path)
|
|
assert im.size[0] > 0 and im.size[1] > 0
|
|
return True
|
|
except:
|
|
return False
|
|
|
|
|
|
def gpt_key_set():
|
|
openai_key = os.environ.get('OPENAI_API_KEY', None)
|
|
return isinstance(openai_key, str) and openai_key.startswith('sk-')
|
|
|
|
|
|
def apiok(wrapper):
|
|
s = wrapper.generate('Hello!')
|
|
return wrapper.fail_msg not in s
|
|
|
|
|
|
def circular_pred(df, extract_func=None):
|
|
if extract_func is None:
|
|
extract_func = lambda x: x # noqa: E731
|
|
df = df.sort_values('index')
|
|
from vlmeval.utils import can_infer_option
|
|
|
|
shift = int(1e6)
|
|
|
|
choices = [extract_func(x) for x in df['prediction']]
|
|
pred_map = {i: c for i, c in zip(df['index'], choices)}
|
|
flag_map = {i: True for i in pred_map if i < 1e6}
|
|
valid_map = {i: True for i in pred_map if i < 1e6}
|
|
for i in df['index']:
|
|
if i >= shift and pred_map[i] and pred_map[i - shift]:
|
|
if pred_map[i] not in list(
|
|
string.ascii_uppercase
|
|
) or pred_map[ # noqa: W504
|
|
i - shift
|
|
] not in list(
|
|
string.ascii_uppercase
|
|
):
|
|
|
|
valid_map[i % shift] = False
|
|
continue
|
|
if (ord(pred_map[i]) - ord(pred_map[i - shift])) % 4 == 1:
|
|
continue
|
|
else:
|
|
flag_map[i % shift] = False
|
|
flag_map = {k: v for k, v in flag_map.items() if valid_map[k]}
|
|
flags = list(flag_map.values())
|
|
return np.mean(flags)
|