Add eval_mm dir

2026-02-05 18:29:18 +08:00 · 2024-05-28 01:21:34 +08:00
parent 7e12387362
commit 65f5567a3a
49 changed files with 5610 additions and 0 deletions
--- a/eval_mm/vqaeval/datasets/init.py
+++ b/eval_mm/vqaeval/datasets/init.py
--- a/eval_mm/vqaeval/datasets/vqa_dataset.py
+++ b/eval_mm/vqaeval/datasets/vqa_dataset.py
@@ -0,0 +1,116 @@
+import json
+import os
+import re
+from torch.utils.data import Dataset
+
+def prompt_processor(prompt):
+    if prompt.startswith('OCR tokens: '):
+        pattern = r"Question: (.*?) Short answer:"
+        match = re.search(pattern, prompt, re.DOTALL)
+        question = match.group(1)
+    elif 'Reference OCR token: ' in prompt and len(prompt.split('\n')) == 3:
+        if prompt.startswith('Reference OCR token:'):
+            question = prompt.split('\n')[1]
+        else:
+            question = prompt.split('\n')[0]
+    elif len(prompt.split('\n')) == 2:
+        question = prompt.split('\n')[0]
+    else:
+        assert False
+
+    return question.lower()
+    
+class textVQADataset(Dataset):
+    def __init__(
+        self,
+        image_dir="./downloads/TextVQA/train_images",
+        ann_path="./downloads/TextVQA/TextVQA_0.5.1_val.json",
+    ):
+        self.data = json.load(open(ann_path, "r"))["data"]
+        self.image_dir = image_dir
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, idx):
+        question = self.data[idx]['question']
+        answers = self.data[idx]['answers']
+        img_id = self.data[idx]['image_id']
+        qid = self.data[idx]['question_id']
+        img_path = os.path.join(self.image_dir, f"{img_id}.jpg")
+        
+        item = {
+            "question_id": qid,
+            "image_path": img_path,
+            "question": question,
+            "gt_answers": answers
+        }
+        
+        return item
+    
+class docVQADataset(Dataset):
+    def __init__(
+        self,
+        image_dir= "./downloads/DocVQA/spdocvqa_images",
+        ann_path= "./downloads/DocVQA/val_v1.0_withQT.json",
+        ocr_token_path=None
+    ):
+
+        self.data = json.load(open(ann_path, "r"))["data"]
+        self.image_dir = image_dir
+        self.ann_path = ann_path
+        if ocr_token_path:
+            self.ocr_token_data = {item['image_id']: item for item in json.load(open(ocr_token_path, "r"))["data"]}
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, idx):
+        question_id = self.data[idx]['questionId']  
+        relative_img_path = self.data[idx]['image']
+        corrected_relative_img_path = relative_img_path.replace("documents", "images")
+        img_path = os.path.join(self.image_dir, corrected_relative_img_path)
+        question = self.data[idx]['question']
+        answers = self.data[idx]['answers']
+        
+        question_type = self.data[idx]['question_types']
+        
+        return {
+            "question_id": question_id,  
+            "image_path": img_path,
+            "question": question,
+            "gt_answers": answers,
+            'question_type': question_type,
+        }
+
+
+class docVQATESTDataset(Dataset):
+    def __init__(
+        self,
+        image_dir= "./downloads/DocVQA/spdocvqa_images",
+        ann_path= "./downloads/DocVQA/test_v1.0.json",
+        ocr_token_path=None
+    ):
+
+        self.data = json.load(open(ann_path, "r"))["data"]
+        self.image_dir = image_dir
+        self.ann_path = ann_path
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, idx):
+        question_id = self.data[idx]['questionId']  
+        relative_img_path = self.data[idx]['image']
+        corrected_relative_img_path = relative_img_path.replace("documents", "images")
+        img_path = os.path.join(self.image_dir, corrected_relative_img_path)
+        question = self.data[idx]['question']
+        
+        
+        return {
+            "question_id": question_id,  
+            "image_path": img_path,
+            "question": question,
+            "gt_answers": "",
+            'question_type': "",
+        }