File size: 2,963 Bytes
d0e6f56
 
cebbfb7
d0e6f56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cebbfb7
d0e6f56
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# app.py

import streamlit as st
from PIL import Image
import io

import torch
from transformers import AutoModel, AutoTokenizer

@st.cache_resource(show_spinner=True)
def load_model():
    model = AutoModel.from_pretrained("openbmb/MiniCPM-V", trust_remote_code=True, torch_dtype=torch.bfloat16)
    tokenizer = AutoTokenizer.from_pretrained("openbmb/MiniCPM-V", trust_remote_code=True)
    # send to GPU/CPU as per availability
    if torch.cuda.is_available():
        model = model.to(device='cuda', dtype=torch.bfloat16)
    else:
        model = model.to(device='cpu')
    model.eval()
    return model, tokenizer

model, tokenizer = load_model()

st.set_page_config(page_title="MiniCPM-V Chat", layout="wide")
st.title("📄 MiniCPM-V Chat — Image/Text → Markdown / Chat")

if "history" not in st.session_state:
    st.session_state.history = []

# Sidebar: upload or text input
with st.sidebar:
    uploaded_file = st.file_uploader("Upload image / pdf-page (image) or enter text:", type=["jpg","jpeg","png","pdf","txt"])
    text_input = st.text_area("Or paste text here:")

# Main chat interface
for msg in st.session_state.history:
    with st.chat_message(msg["role"]):
        st.markdown(msg["content"])

def run_minicpm_v(input_image=None, input_text=None, history=None):
    """
    input_image: PIL.Image or None
    input_text: str or None
    history: list of prior messages (role, content)
    """
    msgs = []
    if history:
        msgs = history.copy()
    # Compose new user message
    user_content = ""
    if input_image is not None:
        user_content = "[Image Uploaded]\n"  # or some marker + optional prompt
    if input_text:
        user_content += input_text
    msgs.append({"role": "user", "content": user_content})

    # Run the multimodal chat
    res, context, _ = model.chat(
        image=input_image,
        msgs=msgs,
        context=None,
        tokenizer=tokenizer,
        sampling=True,
        temperature=0.7,
    )
    return res

if uploaded_file is not None or text_input:
    with st.chat_message("user"):
        if uploaded_file is not None:
            st.image(uploaded_file, caption="Uploaded")
        if text_input:
            st.markdown(text_input)
    # Process input
    input_image = None
    input_text = None
    if uploaded_file is not None:
        # try open as image
        try:
            input_image = Image.open(uploaded_file).convert("RGB")
        except Exception as e:
            st.error("Could not open uploaded file as image.")
    if text_input:
        input_text = text_input

    with st.spinner("Thinking..."):
        reply = run_minicpm_v(input_image=input_image, input_text=input_text, history=st.session_state.history)

    st.session_state.history.append({"role": "assistant", "content": reply})
    with st.chat_message("assistant"):
        st.markdown(reply)

st.chat_input(placeholder="Send more text or upload another file…")  # optional extra prompt