diff --git a/web_demo_streamlit-2_5.py b/web_demo_streamlit-2_5.py
index bf100ce..4cee58c 100644
--- a/web_demo_streamlit-2_5.py
+++ b/web_demo_streamlit-2_5.py
@@ -88,6 +88,7 @@ if user_text:
     # Generate reply using the model
     model = st.session_state.model
     tokenizer = st.session_state.tokenizer
+    imagefile = None
 
     with st.chat_message(A_NAME, avatar="assistant"):
         # If the previous message contains an image, pass the image to the model
diff --git a/web_demo_streamlit-minicpmv2_6.py b/web_demo_streamlit-minicpmv2_6.py
new file mode 100644
index 0000000..0ad8be5
--- /dev/null
+++ b/web_demo_streamlit-minicpmv2_6.py
@@ -0,0 +1,271 @@
+import os.path
+
+import streamlit as st
+import torch
+from PIL import Image
+from decord import VideoReader, cpu
+import numpy as np
+from transformers import AutoModel, AutoTokenizer
+
+# Model path
+model_path = "openbmb/MiniCPM-V-2_6"
+upload_path = ".\\uploads"
+
+# User and assistant names
+U_NAME = "User"
+A_NAME = "Assistant"
+
+# Set page configuration
+st.set_page_config(
+    page_title="MiniCPM-V-2_6 Streamlit",
+    page_icon=":robot:",
+    layout="wide"
+)
+
+
+# Load model and tokenizer
+@st.cache_resource
+def load_model_and_tokenizer():
+    print(f"load_model_and_tokenizer from {model_path}")
+    model = (AutoModel.from_pretrained(model_path, trust_remote_code=True, attn_implementation='sdpa').
+             to(dtype=torch.bfloat16))
+    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+    return model, tokenizer
+
+
+# Initialize session state
+if 'model' not in st.session_state:
+    st.session_state.model, st.session_state.tokenizer = load_model_and_tokenizer()
+    st.session_state.model.eval().cuda()
+    print("model and tokenizer had loaded completed!")
+
+# Initialize session state
+if 'chat_history' not in st.session_state:
+    st.session_state.chat_history = []
+    st.session_state.uploaded_image_list = []
+    st.session_state.uploaded_image_num = 0
+    st.session_state.uploaded_video_list = []
+    st.session_state.uploaded_video_num = 0
+    st.session_state.response = ""
+
+# Sidebar settings
+sidebar_name = st.sidebar.title("MiniCPM-V-2_6 Streamlit")
+max_length = st.sidebar.slider("max_length", 0, 4096, 2048, step=2)
+repetition_penalty = st.sidebar.slider("repetition_penalty", 0.0, 2.0, 1.05, step=0.01)
+top_k = st.sidebar.slider("top_k", 0, 100, 100, step=1)
+top_p = st.sidebar.slider("top_p", 0.0, 1.0, 0.8, step=0.01)
+temperature = st.sidebar.slider("temperature", 0.0, 1.0, 0.7, step=0.01)
+
+# Button to clear session history
+buttonClean = st.sidebar.button("Clearing session history", key="clean")
+if buttonClean:
+    # Reset the session state history and uploaded file lists
+    st.session_state.chat_history = []
+    st.session_state.uploaded_image_list = []
+    st.session_state.uploaded_image_num = 0
+    st.session_state.uploaded_video_list = []
+    st.session_state.uploaded_video_num = 0
+    st.session_state.response = ""
+
+    # If using GPU, clear the CUDA cache to free up memory
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+
+    # Rerun to refresh the interface
+    st.rerun()
+
+# Display chat history
+for i, message in enumerate(st.session_state.chat_history):
+    if message["role"] == "user":
+        with st.chat_message(name="user", avatar="user"):
+            if message["image"] is not None:
+                st.image(message["image"], caption='User uploaded images', width=512, use_column_width=False)
+                continue
+            elif message["video"] is not None:
+                st.video(message["video"], format="video/mp4", loop=False, autoplay=False, muted=True)
+                continue
+            elif message["content"] is not None:
+                st.markdown(message["content"])
+    else:
+        with st.chat_message(name="model", avatar="assistant"):
+            st.markdown(message["content"])
+
+# Select mode
+selected_mode = st.sidebar.selectbox("Select Mode", ["Text", "Single Image", "Multiple Images", "Video"])
+
+# Supported image file extensions
+image_type = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp']
+
+if selected_mode == "Single Image":
+    # Single Image Mode
+    uploaded_image = st.sidebar.file_uploader("Upload a Single Image", key=1, type=image_type,
+                                              accept_multiple_files=False)
+    if uploaded_image is not None:
+        st.image(uploaded_image, caption='User Uploaded Image', width=512, use_column_width=False)
+        # Add the uploaded image to the chat history
+        st.session_state.chat_history.append({"role": "user", "content": None, "image": uploaded_image, "video": None})
+        st.session_state.uploaded_image_list = [uploaded_image]
+        st.session_state.uploaded_image_num = 1
+
+if selected_mode == "Multiple Images":
+    # Multiple Images Mode
+    uploaded_image_list = st.sidebar.file_uploader("Upload Multiple Images", key=2, type=image_type,
+                                                   accept_multiple_files=True)
+    uploaded_image_num = len(uploaded_image_list)
+
+    if uploaded_image_list is not None and uploaded_image_num > 0:
+        for img in uploaded_image_list:
+            st.image(img, caption='User Uploaded Image', width=512, use_column_width=False)
+            # Add the uploaded images to the chat history
+            st.session_state.chat_history.append({"role": "user", "content": None, "image": img, "video": None})
+        # Update the uploaded image list and count in st.session_state
+        st.session_state.uploaded_image_list = uploaded_image_list
+        st.session_state.uploaded_image_num = uploaded_image_num
+
+# Supported video format suffixes
+video_type = ['.mp4', '.mkv', '.mov', '.avi', '.flv', '.wmv', '.webm', '.m4v']
+
+# Tip: You can use the command `streamlit run ./web_demo_streamlit-minicpmv2_6.py --server.maxUploadSize 1024`
+# to adjust the maximum upload size to 1024MB or larger files.
+# The default 200MB limit of Streamlit's file_uploader component might be insufficient for video-based interactions.
+# Adjust the size based on your GPU memory usage.
+
+if selected_mode == "Video":
+    # 单个视频模态
+    uploaded_video = st.sidebar.file_uploader("Upload a single video file", key=3, type=video_type,
+                                              accept_multiple_files=False)
+    if uploaded_video is not None:
+        st.video(uploaded_video, format="video/mp4", loop=False, autoplay=False, muted=True)
+        st.session_state.chat_history.append({"role": "user", "content": None, "image": None, "video": uploaded_video})
+
+        uploaded_video_path = os.path.join(upload_path, uploaded_video.name)
+        with open(uploaded_video_path, "wb") as vf:
+            vf.write(uploaded_video.getvalue())
+        st.session_state.uploaded_video_list = [uploaded_video_path]
+        st.session_state.uploaded_video_num = 1
+
+MAX_NUM_FRAMES = 64  # if cuda OOM set a smaller number
+
+
+# Encodes a video by sampling frames at a fixed rate and converting them to image arrays.
+def encode_video(video_path):
+    def uniform_sample(frame_indices, num_samples):
+        # Calculate sampling interval and uniformly sample frame indices
+        gap = len(frame_indices) / num_samples
+        sampled_idxs = np.linspace(gap / 2, len(frame_indices) - gap / 2, num_samples, dtype=int)
+        return [frame_indices[i] for i in sampled_idxs]
+
+    # Read the video and set the decoder's context to CPU
+    vr = VideoReader(video_path, ctx=cpu(0))
+
+    # Calculate the sampling interval to sample video frames at 1 FPS
+    sample_fps = round(vr.get_avg_fps() / 1)  # Use integer FPS
+    frame_idx = list(range(0, len(vr), sample_fps))
+
+    # If the number of sampled frames exceeds the maximum limit, uniformly sample them
+    if len(frame_idx) > MAX_NUM_FRAMES:
+        frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES)
+
+    # Retrieve the sampled frames and convert them to image arrays
+    frames = vr.get_batch(frame_idx).asnumpy()
+    frames = [Image.fromarray(frame.astype('uint8')) for frame in frames]
+
+    print('Number of frames:', len(frames))
+    return frames
+
+
+
+# User input box
+user_text = st.chat_input("Enter your question")
+if user_text is not None:
+    if user_text.strip() is "":
+        st.warning('Input message could not be empty!', icon="⚠️")
+    else:
+        # Display user input and save it to session history
+        with st.chat_message(U_NAME, avatar="user"):
+            st.session_state.chat_history.append({
+                "role": "user",
+                "content": user_text,
+                "image": None,
+                "video": None
+            })
+            st.markdown(f"{U_NAME}: {user_text}")
+
+        # Generate responses using the model
+        model = st.session_state.model
+        tokenizer = st.session_state.tokenizer
+        content_list = []  # Store the content (text or image) that will be passed into the model
+        imageFile = None
+
+        with st.chat_message(A_NAME, avatar="assistant"):
+            # Handle different inputs depending on the mode selected by the user
+            if selected_mode == "Single Image":
+                # Single image mode: pass in the last uploaded image
+                print("Single Images mode in use")
+                if len(st.session_state.chat_history) > 1 and len(st.session_state.uploaded_image_list) >= 1:
+                    uploaded_image = st.session_state.uploaded_image_list[-1]
+                    if uploaded_image:
+                        imageFile = Image.open(uploaded_image).convert('RGB')
+                        content_list.append(imageFile)
+                else:
+                    print("Single Images mode: No image found")
+
+            elif selected_mode == "Multiple Images":
+                # Multi-image mode: pass in all the images uploaded last time
+                print("Multiple Images mode in use")
+                if len(st.session_state.chat_history) > 1 and st.session_state.uploaded_image_num >= 1:
+                    for uploaded_image in st.session_state.uploaded_image_list:
+                        imageFile = Image.open(uploaded_image).convert('RGB')
+                        content_list.append(imageFile)
+                else:
+                    print("Multiple Images mode: No image found")
+
+            elif selected_mode == "Video":
+                # Video mode: pass in slice frames of uploaded video
+                print("Video mode in use")
+                if len(st.session_state.chat_history) > 1 and st.session_state.uploaded_video_num == 1:
+                    uploaded_video_path = st.session_state.uploaded_video_list[-1]
+                    if uploaded_video_path:
+                        with st.spinner('Encoding your video, please wait...'):
+                            frames = encode_video(uploaded_video_path)
+                else:
+                    print("Video Mode: No video found")
+
+            # Defining model parameters
+            params = {
+                'sampling': True,
+                'top_p': top_p,
+                'top_k': top_k,
+                'temperature': temperature,
+                'repetition_penalty': repetition_penalty,
+                "max_new_tokens": max_length,
+                "stream": True
+            }
+
+            # Set different input parameters depending on whether to upload a video
+            if st.session_state.uploaded_video_num == 1 and selected_mode == "Video":
+                msgs = [{"role": "user", "content": frames + [user_text]}]
+                # Set decode params for video
+                params["max_inp_length"] = 4352  # Set the maximum input length of the video mode
+                params["use_image_id"] = False  # Do not use image_id
+                params["max_slice_nums"] = 1  # # use 1 if cuda OOM and video resolution >  448*448
+            else:
+                content_list.append(user_text)
+                msgs = [{"role": "user", "content": content_list}]
+
+            print("content_list:", content_list)  # debug
+            print("params:", params)  # debug
+
+            # Generate and display the model's responses
+            with st.spinner('AI is thinking...'):
+                response = model.chat(image=None, msgs=msgs, context=None, tokenizer=tokenizer, **params)
+            st.session_state.response = st.write_stream(response)
+            st.session_state.chat_history.append({
+                "role": "model",
+                "content": st.session_state.response,
+                "image": None,
+                "video": None
+            })
+
+        st.divider()  # Add separators to the interface
+