Spaces:

detect-tech
/

Qwen-VL-Instruct-Backend

Paused

App Files Files Community

Lemorra commited on May 26

Commit

083d486

1 Parent(s): 26e3b58

🎉 Pushed initial codebase

Browse files

Files changed (14) hide show

.gitignore +16 -0
Dockerfile +16 -0
README.md +5 -5
requirements.txt +11 -0
src/__pycache__/app.cpython-310.pyc +0 -0
src/app.py +32 -0
src/utils/__init__.py +0 -0
src/utils/__pycache__/__init__.cpython-310.pyc +0 -0
src/utils/__pycache__/authentication.cpython-310.pyc +0 -0
src/utils/__pycache__/payload_model.cpython-310.pyc +0 -0
src/utils/__pycache__/qwen_inference.cpython-310.pyc +0 -0
src/utils/authentication.py +20 -0
src/utils/payload_model.py +9 -0
src/utils/qwen_inference.py +134 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,16 @@

+# Ignores Python cache directories:
+# __pycache__/ - ignores cache directory in root folder
+# */__pycache__/ - ignores cache directories one level deep
+# **/__pycache__/ - ignores cache directories at any depth
+__pycache__/
+*/__pycache__/
+**/__pycache__/
+# Python bytecode files:
+# *.pyc - compiled Python files
+# *.pyo - optimized Python files
+# *.pyd - Python DLL files
+*.py[cod]
+# Python implementation-specific bytecode
+*$py.class

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "src.app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,12 +1,12 @@
 ---
-title: Qwen VL Instruct Backend
-emoji: 😻
-colorFrom: blue
-colorTo: pink
 sdk: docker
 pinned: false
 license: mit
-short_description: QwenVL models; Single/Multi Image support
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Qwen2.5 VL 3B Instruct Backend API
+emoji: 📚
+colorFrom: red
+colorTo: yellow
 sdk: docker
 pinned: false
 license: mit
+short_description: A Qwen2.5-VL-3B-Instruct backend for testing
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+fastapi
+uvicorn[standard]
+transformers
+accelerate
+qwen-vl-utils[decord]==0.0.8
+python-dotenv
+PyJWT
+pydantic
+torch
+torchvision
+hf_xet

src/__pycache__/app.cpython-310.pyc ADDED Viewed

Binary file (1.19 kB). View file

src/app.py ADDED Viewed

	@@ -0,0 +1,32 @@

+from pyexpat import model
+from typing import Annotated
+from fastapi import FastAPI, Depends
+from .utils.authentication import verify_token
+from .utils.payload_model import SingleInferencePayload, VideoInferencePayload
+from .utils.qwen_inference import Qwen2_5
+import os
+from dotenv import load_dotenv
+load_dotenv()
+model_path = os.getenv("MODEL_PATH")
+model_object = Qwen2_5(model_path)
+app = FastAPI()
+@app.get("/")
+def greet_json():
+    return {
+        "message": "Welcome! The backend API for Qwen2.5-VL-3B-Instruct model is running.",
+        "status": "active"
+    }
+@app.post("/single_inference")
+def single_inference(payload: SingleInferencePayload, _token: Annotated[dict, Depends(verify_token)]):
+    return model_object.get_single_inference(payload)
+@app.post("/video_inference")
+def video_inference(payload: VideoInferencePayload, _token: Annotated[dict, Depends(verify_token)]):
+    return model_object.get_video_inference(payload)

src/utils/__init__.py ADDED Viewed

File without changes

src/utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (236 Bytes). View file

src/utils/__pycache__/authentication.cpython-310.pyc ADDED Viewed

Binary file (1.02 kB). View file

src/utils/__pycache__/payload_model.cpython-310.pyc ADDED Viewed

Binary file (728 Bytes). View file

src/utils/__pycache__/qwen_inference.cpython-310.pyc ADDED Viewed

Binary file (712 Bytes). View file

src/utils/authentication.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from fastapi import HTTPException, Header
+import jwt
+from dotenv import load_dotenv
+import os
+load_dotenv()
+def get_secret_key():
+    return os.getenv("SECRET_KEY")
+async def verify_token(authorization: str = Header(...)):
+    try:
+        token_type, token = authorization.split()
+        if token_type.lower() != "bearer":
+            raise HTTPException(status_code=401, detail="Invalid token type")
+        return jwt.decode(token, get_secret_key(), algorithms=["HS256"])
+    except jwt.ExpiredSignatureError:
+        raise HTTPException(status_code=401, detail="Token has expired")
+    except (jwt.InvalidTokenError, IndexError):
+        raise HTTPException(status_code=401, detail="Invalid token")

src/utils/payload_model.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from pydantic import BaseModel
+class SingleInferencePayload(BaseModel):
+    image_path: str
+    question: str
+class VideoInferencePayload(BaseModel):
+    video_path: str
+    question: list[str]

src/utils/qwen_inference.py ADDED Viewed

	@@ -0,0 +1,134 @@

+from .payload_model import SingleInferencePayload, VideoInferencePayload
+from transformers import AutoModelForVision2Seq, AutoTokenizer, AutoProcessor
+from qwen_vl_utils import process_vision_info
+from pydantic import BaseModel
+from typing import Optional
+class Qwen2_5(BaseModel):
+    model: Optional[AutoModelForVision2Seq] = None
+    tokenizer: Optional[AutoTokenizer] = None
+    processor: Optional[AutoProcessor] = None
+    model_config = {
+        "arbitrary_types_allowed": True,
+        "from_attributes": True
+    }
+    def __init__(self, model_path: str):
+        super().__init__()
+        self.model = AutoModelForVision2Seq.from_pretrained(
+            model_path, torch_dtype="auto", device_map="auto"
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
+        self.processor = AutoProcessor.from_pretrained(model_path)
+    def prepare_single_inference(self, image: str, question: str):
+        image = f"data:image;base64,{image}"
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "image": image,
+                    },
+                    {
+                        "type": "text",
+                        "text": question
+                    },
+                ],
+            }
+        ]
+        text = self.processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        image_inputs, video_inputs = process_vision_info(messages)
+        inputs = self.processor(
+            text=[text],
+            images=image_inputs,
+            videos=video_inputs,
+            padding=True,
+            return_tensors="pt",
+        )
+        inputs = inputs.to("cuda")
+        return inputs
+    def prepare_video_inference(self, video: list[str], question: str):
+        base64_videos = []
+        for frame in video:
+            base64_videos.append(f"data:image;base64,{frame}")
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "video",
+                        "video": base64_videos,
+                    },
+                    {
+                        "type": "text",
+                        "text": question
+                    },
+                ],
+            }
+        ]
+        text = self.processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        image_inputs, video_inputs, video_kwargs = process_vision_info(messages, return_video_kwargs=True)
+        inputs = self.processor(
+            text=[text],
+            images=image_inputs,
+            videos=video_inputs,
+            fps=1.0,
+            padding=True,
+            return_tensors="pt",
+            **video_kwargs,
+        )
+        inputs = inputs.to("cuda")
+        return inputs
+    def get_single_inference(self, payload: SingleInferencePayload):
+        try:
+            processed_inputs = self.prepare_single_inference(payload.image_path, payload.question)
+            generated_ids = self.model.generate(**processed_inputs, max_new_tokens=128)
+            generated_ids_trimmed = [
+                out_ids[len(in_ids) :] for in_ids, out_ids in zip(processed_inputs.input_ids, generated_ids)
+            ]
+            output_text = self.processor.batch_decode(
+                generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+            )
+            print(f"Model generated text: {output_text}")
+            return {
+                "message": output_text,
+                "status": 200
+            }
+        except Exception as e:
+            return {
+                "message": str(e),
+                "status": 500
+            }
+    def get_video_inference(self, payload: VideoInferencePayload):
+        try:
+            processed_inputs = self.prepare_video_inference(payload.video_path, payload.question)
+            generated_ids = self.model.generate(**processed_inputs, max_new_tokens=128)
+            generated_ids_trimmed = [
+                out_ids[len(in_ids) :] for in_ids, out_ids in zip(processed_inputs.input_ids, generated_ids)
+            ]
+            output_text = self.processor.batch_decode(
+                generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+            )
+            print(f"Model generated text: {output_text}")
+            return {
+                "message": output_text,
+                "status": 200
+            }
+        except Exception as e:
+            return {
+                "message": str(e),
+                "status": 500
+            }