Modify eval_mm for MiniCPM-o 2.6

This commit is contained in:
Poppy Xu
2025-01-21 15:34:54 +08:00
parent ec68cefc17
commit d8f382e157
82 changed files with 14279 additions and 843 deletions

View File

@@ -8,11 +8,14 @@ class VideoBaseDataset:
def __init__(self,
dataset='MMBench-Video',
pack=False):
pack=False,
nframe=0,
fps=-1):
try:
import decord
except:
warnings.warn('Please install decord via `pip install decord`.')
except Exception as e:
logging.critical(f'{type(e)}: {e}')
logging.critical('Please install decord via `pip install decord`.')
self.dataset_name = dataset
ret = self.prepare_dataset(dataset)
@@ -21,6 +24,7 @@ class VideoBaseDataset:
self.frame_root = osp.join(lmu_root, 'images', dataset)
os.makedirs(self.frame_root, exist_ok=True)
self.frame_tmpl = 'frame-{}-of-{}.jpg'
self.frame_tmpl_fps = 'frame-{}-of-{}-{}fps.jpg'
self.data_root = ret['root']
self.data_file = ret['data_file']
@@ -31,6 +35,12 @@ class VideoBaseDataset:
videos.sort()
self.videos = videos
self.pack = pack
self.nframe = nframe
self.fps = fps
if self.fps > 0 and self.nframe > 0:
raise ValueError('fps and nframe should not be set at the same time')
if self.fps <= 0 and self.nframe <= 0:
raise ValueError('fps and nframe should be set at least one valid value')
def __len__(self):
return len(self.videos) if self.pack else len(self.data)
@@ -44,31 +54,69 @@ class VideoBaseDataset:
assert idx < len(self.data)
return dict(self.data.iloc[idx])
def frame_paths(self, video, num_frames=8):
def frame_paths(self, video):
frame_root = osp.join(self.frame_root, video)
os.makedirs(frame_root, exist_ok=True)
return [osp.join(frame_root, self.frame_tmpl.format(i, num_frames)) for i in range(1, num_frames + 1)]
return [osp.join(frame_root, self.frame_tmpl.format(i, self.nframe)) for i in range(1, self.nframe + 1)]
def save_video_frames(self, video, num_frames=8):
frame_paths = self.frame_paths(video, num_frames)
flag = np.all([osp.exists(p) for p in frame_paths])
if flag:
def frame_paths_fps(self, video, num_frames):
frame_root = osp.join(self.frame_root, video)
os.makedirs(frame_root, exist_ok=True)
return [osp.join(frame_root,
self.frame_tmpl_fps.format(i, num_frames, self.fps)) for i in range(1, num_frames + 1)]
def save_video_frames(self, video):
if self.fps > 0:
vid_path = osp.join(self.data_root, video + '.mp4')
vid = decord.VideoReader(vid_path)
# 计算视频的总帧数和总时长
total_frames = len(vid)
video_fps = vid.get_avg_fps()
total_duration = total_frames / video_fps
# 计算需要提取的总帧数
required_frames = int(total_duration * self.fps)
# 计算提取帧的间隔
step_size = video_fps / self.fps
# 计算提取帧的索引
indices = [int(i * step_size) for i in range(required_frames)]
# 提取帧并保存
frame_paths = self.frame_paths_fps(video, len(indices))
flag = np.all([osp.exists(p) for p in frame_paths])
if flag:
return frame_paths
images = [vid[i].asnumpy() for i in indices]
images = [Image.fromarray(arr) for arr in images]
for im, pth in zip(images, frame_paths):
if not osp.exists(pth):
im.save(pth)
return frame_paths
else:
frame_paths = self.frame_paths(video)
flag = np.all([osp.exists(p) for p in frame_paths])
if flag:
return frame_paths
vid_path = osp.join(self.data_root, video + '.mp4')
vid = decord.VideoReader(vid_path)
step_size = len(vid) / (self.nframe + 1)
indices = [int(i * step_size) for i in range(1, self.nframe + 1)]
images = [vid[i].asnumpy() for i in indices]
images = [Image.fromarray(arr) for arr in images]
for im, pth in zip(images, frame_paths):
if not osp.exists(pth):
im.save(pth)
return frame_paths
vid_path = osp.join(self.data_root, video + '.mp4')
vid = decord.VideoReader(vid_path)
step_size = len(vid) / (num_frames + 1)
indices = [int(i * step_size) for i in range(1, num_frames + 1)]
images = [vid[i].numpy() for i in indices]
images = [Image.fromarray(arr) for arr in images]
for im, pth in zip(images, frame_paths):
if not osp.exists(pth):
im.save(pth)
return frame_paths
# Return a list of dataset names that are supported by this class, can override
@classmethod
def supported_datasets(cls):
return ['MMBench-Video', 'Video-MME', 'MVBench']
return ['MMBench-Video', 'Video-MME', 'MVBench', 'MVBench_MP4', 'LongVideoBench']
# Given the prediction file, return the evaluation results in the format of a dictionary or pandas dataframe
@abstractmethod
@@ -76,7 +124,7 @@ class VideoBaseDataset:
pass
@abstractmethod
def build_prompt(self, idx, num_frames=8):
def build_prompt(self, idx):
pass
@abstractmethod