import torch
import torchvision
from torchvision import transforms
import gradio as gr
import os
import cv2
from PIL import Image
from model import create_model
model,transform=create_model(num_of_classes=3)
model.load_state_dict(torch.load("fire_smoke_weights.pth", map_location=torch.device("cpu")))
model.eval()
def classify_video(video):
    cap = cv2.VideoCapture(video)
    predictions = []
    Fire=[]
    Smoke=[]
    Default=[]
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        img_pil = Image.fromarray(img)
        img_tensor = transform(img_pil).unsqueeze(0)
        with torch.no_grad():
            output = model(img_tensor)
            pred = output.argmax().item()
            predictions.append(pred)
    cap.release()
    class_names=['DEFAULT', 'FIRE Spotted', 'SMOKE Spotted']
    for i in predictions:
        if i == 1:
            Fire.append(i)
        elif i == 2:
            Smoke.append(i)
        else:
            Default.append(i)
    if len(Fire)>5 and len(Smoke)>5:
        return f"Fire and Smoke Spotted"
    else:
        return class_names[max(predictions)]
Description="An MobileNET model trained to classify Fire and Smoke through Videos"
Article="Created at jupyter NoteBook with GPU NVIDIA_GeForce_MX350"
gr.Interface(
    fn=classify_video,
    inputs=gr.Video(streaming=True),
    outputs="text",
    title="Fire and Smoke Classifier",
    description=Description,
    article=Article,
    live="True"
).launch()