diff --git a/README.md b/README.md index 453b024..026d648 100644 --- a/README.md +++ b/README.md @@ -322,7 +322,7 @@ pip install -r requirements.txt Please refer to the following codes to run `OmniLMM`.
- +
@@ -331,10 +331,10 @@ from chat import OmniLMMChat, img2base64 chat_model = OmniLMMChat('openbmb/OmniLMM-12B') # or 'openbmb/MiniCPM-V' -im_64 = img2base64('./assets/COCO_test2015_000000262144.jpg') +im_64 = img2base64('./assets/worldmap_ck.jpg') # First round chat -msgs = [{"role": "user", "content": "What are the people doing?"}] +msgs = [{"role": "user", "content": "What is interesting about this image?"}] inputs = {"image": im_64, "question": json.dumps(msgs)} answer = chat_model.process(inputs) @@ -343,7 +343,7 @@ print(answer) # Second round chat # pass history context of multi-turn conversation msgs.append({"role": "assistant", "content": answer}) -msgs.append({"role": "user", "content": "Describe the image"}) +msgs.append({"role": "user", "content": "Where is China in the image"}) inputs = {"image": im_64, "question": json.dumps(msgs)} answer = chat_model.process(inputs) @@ -352,9 +352,9 @@ print(answer) We can obtain the following results: ``` -"The people in the image are playing baseball. One person is pitching a ball, another one is swinging a bat to hit it, and there's also an umpire present who appears to be watching the game closely." +"The interesting aspect of this image is the shape of the chicken nuggets on the pan. The nuggets are shaped like the continents of the world, which is an unusual and creative way to present the food. It adds a fun and playful element to the meal, making it more visually appealing and engaging." -"The image depicts a baseball game in progress. A pitcher is throwing the ball, while another player is swinging his bat to hit it. An umpire can be seen observing the play closely." +"In the image, China is located on the right side of the pan. It is one of the nuggets shaped like the continents of the world, and its placement on the right side of the pan is consistent with its geographical location in the real world" ``` diff --git a/assets/COCO_test2015_000000262144.jpg b/assets/COCO_test2015_000000262144.jpg deleted file mode 100644 index 012f88d..0000000 Binary files a/assets/COCO_test2015_000000262144.jpg and /dev/null differ diff --git a/assets/worldmap_ck.jpg b/assets/worldmap_ck.jpg new file mode 100644 index 0000000..dce3c0a Binary files /dev/null and b/assets/worldmap_ck.jpg differ diff --git a/chat.py b/chat.py index 0481d16..e802887 100644 --- a/chat.py +++ b/chat.py @@ -177,17 +177,17 @@ if __name__ == '__main__': model_path = 'openbmb/OmniLMM-12B' chat_model = OmniLMMChat(model_path) - im_64 = img2base64('./assets/COCO_test2015_000000262144.jpg') + im_64 = img2base64('./assets/worldmap_ck.jpg') # first round chat - msgs = [{"role": "user", "content": "What are the people doing?"}] + msgs = [{"role": "user", "content": "What is interesting about this image?"}] input = {"image": im_64, "question": json.dumps(msgs, ensure_ascii=True)} answer = chat_model.chat(input) print(msgs[-1]["content"]+'\n', answer) # second round chat msgs.append({"role": "assistant", "content": answer}) - msgs.append({"role": "user", "content": "Describe the image"}) + msgs.append({"role": "user", "content": "Where is China in the image"}) input = {"image": im_64,"question": json.dumps(msgs, ensure_ascii=True)} answer = chat_model.chat(input) print(msgs[-1]["content"]+'\n', answer)