Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,22 +1,21 @@
|
|
| 1 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 2 |
-
from TurkishStemmer import TurkishStemmer
|
| 3 |
import string
|
| 4 |
# import for loading python objects (scikit-learn models)
|
| 5 |
import pickle
|
| 6 |
import nltk
|
| 7 |
from nltk.data import load
|
|
|
|
| 8 |
import streamlit as st
|
| 9 |
import sklearn
|
| 10 |
|
| 11 |
nltk.download('punkt')
|
| 12 |
-
trans_table = {ord(c): None for c in string.punctuation + string.digits}
|
| 13 |
|
| 14 |
def custom_tokenizer_with_Turkish_stemmer(text):
|
| 15 |
# tokenize text
|
| 16 |
# tokens = text.split(" ")
|
| 17 |
-
tokens = [word for word in nltk.word_tokenize(text
|
| 18 |
print(tokens)
|
| 19 |
-
stems = [
|
| 20 |
return stems
|
| 21 |
|
| 22 |
def predictSMSdata(test_text):
|
|
@@ -24,13 +23,13 @@ def predictSMSdata(test_text):
|
|
| 24 |
categories.sort()
|
| 25 |
|
| 26 |
# load model
|
| 27 |
-
filename1 = "
|
| 28 |
file_handle1 = open(filename1, "rb")
|
| 29 |
classifier = pickle.load(file_handle1)
|
| 30 |
file_handle1.close()
|
| 31 |
|
| 32 |
# load tfidf_vectorizer for transforming test text data
|
| 33 |
-
filename2 = "
|
| 34 |
file_handle2 = open(filename2, "rb")
|
| 35 |
tfidf_vectorizer = pickle.load(file_handle2)
|
| 36 |
file_handle2.close()
|
|
@@ -41,10 +40,11 @@ def predictSMSdata(test_text):
|
|
| 41 |
print(categories[predicted[0]])
|
| 42 |
return categories[predicted[0]]
|
| 43 |
|
| 44 |
-
|
|
|
|
| 45 |
|
| 46 |
# adding the text that will show in the text box
|
| 47 |
-
default_value = "
|
| 48 |
text = st.text_area("enter some text!", default_value)
|
| 49 |
if text:
|
| 50 |
out = predictSMSdata(text)
|
|
|
|
| 1 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
|
|
| 2 |
import string
|
| 3 |
# import for loading python objects (scikit-learn models)
|
| 4 |
import pickle
|
| 5 |
import nltk
|
| 6 |
from nltk.data import load
|
| 7 |
+
from nltk.stem import PorterStemmer
|
| 8 |
import streamlit as st
|
| 9 |
import sklearn
|
| 10 |
|
| 11 |
nltk.download('punkt')
|
|
|
|
| 12 |
|
| 13 |
def custom_tokenizer_with_Turkish_stemmer(text):
|
| 14 |
# tokenize text
|
| 15 |
# tokens = text.split(" ")
|
| 16 |
+
tokens = [word for word in nltk.word_tokenize(text)]
|
| 17 |
print(tokens)
|
| 18 |
+
stems = [stemmerEN.stem(item.lower()) for item in tokens]
|
| 19 |
return stems
|
| 20 |
|
| 21 |
def predictSMSdata(test_text):
|
|
|
|
| 23 |
categories.sort()
|
| 24 |
|
| 25 |
# load model
|
| 26 |
+
filename1 = "LinearSVC_SMS_spam_EN.pickle"
|
| 27 |
file_handle1 = open(filename1, "rb")
|
| 28 |
classifier = pickle.load(file_handle1)
|
| 29 |
file_handle1.close()
|
| 30 |
|
| 31 |
# load tfidf_vectorizer for transforming test text data
|
| 32 |
+
filename2 = "tfidf_vectorizer_EN.pickle"
|
| 33 |
file_handle2 = open(filename2, "rb")
|
| 34 |
tfidf_vectorizer = pickle.load(file_handle2)
|
| 35 |
file_handle2.close()
|
|
|
|
| 40 |
print(categories[predicted[0]])
|
| 41 |
return categories[predicted[0]]
|
| 42 |
|
| 43 |
+
# Porter Stemmer for English
|
| 44 |
+
stemmerEN = PorterStemmer()
|
| 45 |
|
| 46 |
# adding the text that will show in the text box
|
| 47 |
+
default_value = "ASKED 3MOBILE IF 0870 CHATLINES INCLU IN FREE MINS. INDIA CUST SERVs SED YES. L8ER GOT MEGA BILL. 3 DONT GIV A SHIT. BAILIFF DUE IN DAYS. I O £250 3 WANT £800"
|
| 48 |
text = st.text_area("enter some text!", default_value)
|
| 49 |
if text:
|
| 50 |
out = predictSMSdata(text)
|