Modify eval_mm for MiniCPM-o 2.6

This commit is contained in:
Poppy Xu
2025-01-21 15:34:54 +08:00
parent ec68cefc17
commit d8f382e157
82 changed files with 14279 additions and 843 deletions

View File

@@ -12,18 +12,20 @@ class ImageYORNDataset(ImageBaseDataset):
'MME': 'https://opencompass.openxlab.space/utils/VLMEval/MME.tsv',
'HallusionBench': 'https://opencompass.openxlab.space/utils/VLMEval/HallusionBench.tsv',
'POPE': 'https://opencompass.openxlab.space/utils/VLMEval/POPE.tsv',
'AMBER': 'https://huggingface.co/datasets/yifanzhang114/AMBER_base64/resolve/main/AMBER.tsv',
}
DATASET_MD5 = {
'MME': 'b36b43c3f09801f5d368627fb92187c3',
'HallusionBench': '0c23ac0dc9ef46832d7a24504f2a0c7c',
'POPE': 'c12f5acb142f2ef1f85a26ba2fbe41d5',
'AMBER': '970d94c0410916166e0a76ba75da7934',
}
# It returns a dataframe
def evaluate(self, eval_file, **judge_kwargs):
from .utils.yorn import YOrN_Extraction, YOrN_auxeval
from .utils.yorn import default_rating, MME_rating, Hallusion_rating, POPE_rating
from .utils.yorn import default_rating, MME_rating, Hallusion_rating, POPE_rating, AMBER_rating
dataset = self.dataset_name
data = load(eval_file)
@@ -71,7 +73,10 @@ class ImageYORNDataset(ImageBaseDataset):
dump(data, storage)
data = load(storage)
data['score'] = (data['answer'] == data['extracted'])
if listinstr(['AMBER'], dataset):
data['score'] = (data['answer'].str.lower() == data['extracted'].str.lower())
else:
data['score'] = (data['answer'] == data['extracted'])
dump(data, storage)
if dataset is not None and listinstr(['MME'], dataset):
@@ -80,6 +85,8 @@ class ImageYORNDataset(ImageBaseDataset):
score = Hallusion_rating(storage)
elif dataset is not None and listinstr(['POPE'], dataset):
score = POPE_rating(storage)
elif dataset is not None and listinstr(['AMBER'], dataset):
score = AMBER_rating(storage)
else:
score = default_rating(storage)