update readme

This commit is contained in:
Hongji Zhu
2024-02-02 15:48:35 +08:00
parent 3194a8921a
commit 2157f5d97a
4 changed files with 9 additions and 9 deletions

View File

@@ -322,7 +322,7 @@ pip install -r requirements.txt
Please refer to the following codes to run `OmniLMM`. Please refer to the following codes to run `OmniLMM`.
<div align="center"> <div align="center">
<img src="assets/COCO_test2015_000000262144.jpg" width="660px"> <img src="assets/worldmap_ck.jpg" width="500px">
</div> </div>
@@ -331,10 +331,10 @@ from chat import OmniLMMChat, img2base64
chat_model = OmniLMMChat('openbmb/OmniLMM-12B') # or 'openbmb/MiniCPM-V' chat_model = OmniLMMChat('openbmb/OmniLMM-12B') # or 'openbmb/MiniCPM-V'
im_64 = img2base64('./assets/COCO_test2015_000000262144.jpg') im_64 = img2base64('./assets/worldmap_ck.jpg')
# First round chat # First round chat
msgs = [{"role": "user", "content": "What are the people doing?"}] msgs = [{"role": "user", "content": "What is interesting about this image?"}]
inputs = {"image": im_64, "question": json.dumps(msgs)} inputs = {"image": im_64, "question": json.dumps(msgs)}
answer = chat_model.process(inputs) answer = chat_model.process(inputs)
@@ -343,7 +343,7 @@ print(answer)
# Second round chat # Second round chat
# pass history context of multi-turn conversation # pass history context of multi-turn conversation
msgs.append({"role": "assistant", "content": answer}) msgs.append({"role": "assistant", "content": answer})
msgs.append({"role": "user", "content": "Describe the image"}) msgs.append({"role": "user", "content": "Where is China in the image"})
inputs = {"image": im_64, "question": json.dumps(msgs)} inputs = {"image": im_64, "question": json.dumps(msgs)}
answer = chat_model.process(inputs) answer = chat_model.process(inputs)
@@ -352,9 +352,9 @@ print(answer)
We can obtain the following results: We can obtain the following results:
``` ```
"The people in the image are playing baseball. One person is pitching a ball, another one is swinging a bat to hit it, and there's also an umpire present who appears to be watching the game closely." "The interesting aspect of this image is the shape of the chicken nuggets on the pan. The nuggets are shaped like the continents of the world, which is an unusual and creative way to present the food. It adds a fun and playful element to the meal, making it more visually appealing and engaging."
"The image depicts a baseball game in progress. A pitcher is throwing the ball, while another player is swinging his bat to hit it. An umpire can be seen observing the play closely." "In the image, China is located on the right side of the pan. It is one of the nuggets shaped like the continents of the world, and its placement on the right side of the pan is consistent with its geographical location in the real world"
``` ```

Binary file not shown.

Before

Width:  |  Height:  |  Size: 188 KiB

BIN
assets/worldmap_ck.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 78 KiB

View File

@@ -177,17 +177,17 @@ if __name__ == '__main__':
model_path = 'openbmb/OmniLMM-12B' model_path = 'openbmb/OmniLMM-12B'
chat_model = OmniLMMChat(model_path) chat_model = OmniLMMChat(model_path)
im_64 = img2base64('./assets/COCO_test2015_000000262144.jpg') im_64 = img2base64('./assets/worldmap_ck.jpg')
# first round chat # first round chat
msgs = [{"role": "user", "content": "What are the people doing?"}] msgs = [{"role": "user", "content": "What is interesting about this image?"}]
input = {"image": im_64, "question": json.dumps(msgs, ensure_ascii=True)} input = {"image": im_64, "question": json.dumps(msgs, ensure_ascii=True)}
answer = chat_model.chat(input) answer = chat_model.chat(input)
print(msgs[-1]["content"]+'\n', answer) print(msgs[-1]["content"]+'\n', answer)
# second round chat # second round chat
msgs.append({"role": "assistant", "content": answer}) msgs.append({"role": "assistant", "content": answer})
msgs.append({"role": "user", "content": "Describe the image"}) msgs.append({"role": "user", "content": "Where is China in the image"})
input = {"image": im_64,"question": json.dumps(msgs, ensure_ascii=True)} input = {"image": im_64,"question": json.dumps(msgs, ensure_ascii=True)}
answer = chat_model.chat(input) answer = chat_model.chat(input)
print(msgs[-1]["content"]+'\n', answer) print(msgs[-1]["content"]+'\n', answer)