mirror of
https://github.com/OpenBMB/MiniCPM-V.git
synced 2026-02-05 18:29:18 +08:00
Update web_demo_streamlit-minicpmv2_6.py
1. Avoid using 'None' string when `user_text` is empty. 2. Added `st.spinner` to display a loading message during AI content generation.
This commit is contained in:
@@ -173,91 +173,97 @@ def encode_video(video_path):
|
|||||||
return frames
|
return frames
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# User input box
|
# User input box
|
||||||
user_text = st.chat_input("Enter your question")
|
user_text = st.chat_input("Enter your question")
|
||||||
if user_text:
|
if user_text is not None:
|
||||||
# Display user input and save it to session history
|
if user_text.strip() is "":
|
||||||
with st.chat_message(U_NAME, avatar="user"):
|
st.warning('Input message could not be empty!', icon="⚠️")
|
||||||
st.session_state.chat_history.append({
|
else:
|
||||||
"role": "user",
|
# Display user input and save it to session history
|
||||||
"content": user_text,
|
with st.chat_message(U_NAME, avatar="user"):
|
||||||
"image": None,
|
st.session_state.chat_history.append({
|
||||||
"video": None
|
"role": "user",
|
||||||
})
|
"content": user_text,
|
||||||
st.markdown(f"{U_NAME}: {user_text}")
|
"image": None,
|
||||||
|
"video": None
|
||||||
|
})
|
||||||
|
st.markdown(f"{U_NAME}: {user_text}")
|
||||||
|
|
||||||
# Generate responses using the model
|
# Generate responses using the model
|
||||||
model = st.session_state.model
|
model = st.session_state.model
|
||||||
tokenizer = st.session_state.tokenizer
|
tokenizer = st.session_state.tokenizer
|
||||||
content_list = [] # Store the content (text or image) that will be passed into the model
|
content_list = [] # Store the content (text or image) that will be passed into the model
|
||||||
imageFile = None
|
imageFile = None
|
||||||
|
|
||||||
with st.chat_message(A_NAME, avatar="assistant"):
|
with st.chat_message(A_NAME, avatar="assistant"):
|
||||||
# Handle different inputs depending on the mode selected by the user
|
# Handle different inputs depending on the mode selected by the user
|
||||||
if selected_mode == "Single Image":
|
if selected_mode == "Single Image":
|
||||||
# Single image mode: pass in the last uploaded image
|
# Single image mode: pass in the last uploaded image
|
||||||
print("Single Images mode in use")
|
print("Single Images mode in use")
|
||||||
if len(st.session_state.chat_history) > 1 and len(st.session_state.uploaded_image_list) >= 1:
|
if len(st.session_state.chat_history) > 1 and len(st.session_state.uploaded_image_list) >= 1:
|
||||||
uploaded_image = st.session_state.uploaded_image_list[-1]
|
uploaded_image = st.session_state.uploaded_image_list[-1]
|
||||||
if uploaded_image:
|
if uploaded_image:
|
||||||
imageFile = Image.open(uploaded_image).convert('RGB')
|
imageFile = Image.open(uploaded_image).convert('RGB')
|
||||||
content_list.append(imageFile)
|
content_list.append(imageFile)
|
||||||
|
else:
|
||||||
|
print("Single Images mode: No image found")
|
||||||
|
|
||||||
|
elif selected_mode == "Multiple Images":
|
||||||
|
# Multi-image mode: pass in all the images uploaded last time
|
||||||
|
print("Multiple Images mode in use")
|
||||||
|
if len(st.session_state.chat_history) > 1 and st.session_state.uploaded_image_num >= 1:
|
||||||
|
for uploaded_image in st.session_state.uploaded_image_list:
|
||||||
|
imageFile = Image.open(uploaded_image).convert('RGB')
|
||||||
|
content_list.append(imageFile)
|
||||||
|
else:
|
||||||
|
print("Multiple Images mode: No image found")
|
||||||
|
|
||||||
|
elif selected_mode == "Video":
|
||||||
|
# Video mode: pass in slice frames of uploaded video
|
||||||
|
print("Video mode in use")
|
||||||
|
if len(st.session_state.chat_history) > 1 and st.session_state.uploaded_video_num == 1:
|
||||||
|
uploaded_video_path = st.session_state.uploaded_video_list[-1]
|
||||||
|
if uploaded_video_path:
|
||||||
|
frames = encode_video(uploaded_video_path)
|
||||||
|
else:
|
||||||
|
print("Video Mode: No video found")
|
||||||
|
|
||||||
|
# Defining model parameters
|
||||||
|
params = {
|
||||||
|
'sampling': True,
|
||||||
|
'top_p': top_p,
|
||||||
|
'top_k': top_k,
|
||||||
|
'temperature': temperature,
|
||||||
|
'repetition_penalty': repetition_penalty,
|
||||||
|
"max_new_tokens": max_length,
|
||||||
|
"stream": True
|
||||||
|
}
|
||||||
|
|
||||||
|
# Set different input parameters depending on whether to upload a video
|
||||||
|
if st.session_state.uploaded_video_num == 1 and selected_mode == "Video":
|
||||||
|
msgs = [{"role": "user", "content": frames + [user_text]}]
|
||||||
|
# Set decode params for video
|
||||||
|
params["max_inp_length"] = 4352 # Set the maximum input length of the video mode
|
||||||
|
params["use_image_id"] = False # Do not use image_id
|
||||||
|
params["max_slice_nums"] = 1 # # use 1 if cuda OOM and video resolution > 448*448
|
||||||
else:
|
else:
|
||||||
print("Single Images mode: No image found")
|
content_list.append(user_text)
|
||||||
|
msgs = [{"role": "user", "content": content_list}]
|
||||||
|
|
||||||
elif selected_mode == "Multiple Images":
|
print("content_list:", content_list) # debug
|
||||||
# Multi-image mode: pass in all the images uploaded last time
|
print("params:", params) # debug
|
||||||
print("Multiple Images mode in use")
|
|
||||||
if len(st.session_state.chat_history) > 1 and st.session_state.uploaded_image_num >= 1:
|
|
||||||
for uploaded_image in st.session_state.uploaded_image_list:
|
|
||||||
imageFile = Image.open(uploaded_image).convert('RGB')
|
|
||||||
content_list.append(imageFile)
|
|
||||||
else:
|
|
||||||
print("Multiple Images mode: No image found")
|
|
||||||
|
|
||||||
elif selected_mode == "Video":
|
# Generate and display the model's responses
|
||||||
# Video mode: pass in slice frames of uploaded video
|
with st.spinner('AI is thinking...'):
|
||||||
print("Video mode in use")
|
response = model.chat(image=None, msgs=msgs, context=None, tokenizer=tokenizer, **params)
|
||||||
if len(st.session_state.chat_history) > 1 and st.session_state.uploaded_video_num == 1:
|
st.session_state.response = st.write_stream(response)
|
||||||
uploaded_video_path = st.session_state.uploaded_video_list[-1]
|
st.session_state.chat_history.append({
|
||||||
if uploaded_video_path:
|
"role": "model",
|
||||||
frames = encode_video(uploaded_video_path)
|
"content": st.session_state.response,
|
||||||
else:
|
"image": None,
|
||||||
print("Video Mode: No video found")
|
"video": None
|
||||||
|
})
|
||||||
|
|
||||||
# Defining model parameters
|
st.divider() # Add separators to the interface
|
||||||
params = {
|
|
||||||
'sampling': True,
|
|
||||||
'top_p': top_p,
|
|
||||||
'top_k': top_k,
|
|
||||||
'temperature': temperature,
|
|
||||||
'repetition_penalty': repetition_penalty,
|
|
||||||
"max_new_tokens": max_length,
|
|
||||||
"stream": True
|
|
||||||
}
|
|
||||||
|
|
||||||
# Set different input parameters depending on whether to upload a video
|
|
||||||
if st.session_state.uploaded_video_num == 1 and selected_mode == "Video":
|
|
||||||
msgs = [{"role": "user", "content": frames + [user_text]}]
|
|
||||||
# Set decode params for video
|
|
||||||
params["max_inp_length"] = 4352 # Set the maximum input length of the video mode
|
|
||||||
params["use_image_id"] = False # Do not use image_id
|
|
||||||
params["max_slice_nums"] = 1 # # use 1 if cuda OOM and video resolution > 448*448
|
|
||||||
else:
|
|
||||||
content_list.append(user_text)
|
|
||||||
msgs = [{"role": "user", "content": content_list}]
|
|
||||||
|
|
||||||
print("content_list:", content_list) # debug
|
|
||||||
print("params:", params) # debug
|
|
||||||
|
|
||||||
# Generate and display the model's responses
|
|
||||||
response = model.chat(image=None, msgs=msgs, context=None, tokenizer=tokenizer, **params)
|
|
||||||
st.session_state.response = st.write_stream(response)
|
|
||||||
st.session_state.chat_history.append({
|
|
||||||
"role": "model",
|
|
||||||
"content": st.session_state.response,
|
|
||||||
"image": None,
|
|
||||||
"video": None
|
|
||||||
})
|
|
||||||
|
|
||||||
st.divider() # Add separators to the interface
|
|
||||||
|
|||||||
Reference in New Issue
Block a user