File size: 1,433 Bytes
d5e03e0
6625afa
 
 
d483b6b
6625afa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6303d5e
6625afa
d6ea4ef
 
 
 
 
 
6625afa
 
0ff8189
 
535a668
aace625
 
 
 
 
535a668
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import pickle 
import gradio as gr
import numpy as np
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')



pipe2 = joblib.load("sentiment_analysis_model.joblib")

def preprocess_nltk(text):
    lemmatizer = WordNetLemmatizer()
    tokens = word_tokenize(text.lower())  # Tokenization
    stop_words = set(stopwords.words("english"))
    filtered_tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalnum() and token not in stop_words]
    return " ".join(filtered_tokens)




def prediction(text):
    text_processed=(preprocess_nltk(text))
    ans=pipe2.predict([text_processed])
    classes = ['Irrelevant', 'Natural', 'Negative', 'Positive']
    predicted_label = ans[0]
    return(f"The above text is:{classes[predicted_label]}" )

pre = gr.Interface(
    fn=prediction,
    inputs=["text"],
    outputs=[gr.Textbox(label="Prediction", lines=3)],
)

pre.launch()