SMS-spam-English-sklearn

Runtime error

akuysal commited on Mar 18, 2023

Commit

2f60bb1

1 Parent(s): 29ec108

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,6 +10,12 @@ import sklearn
 nltk.download('punkt')
 def predictSMSdata(test_text):
     categories = ["legitimate", "spam"]
     categories.sort()

 nltk.download('punkt')
+def custom_tokenizer_with_English_stemmer(text):
+    # my text was unicode so I had to use the unicode-specific translate function. If your documents are strings, you will need to use a different `translate` function here. `Translated` here just does search-replace. See the trans_table: any matching character in the set is replaced with `None`
+    tokens = [word for word in nltk.word_tokenize(text)]
+    stems = [stemmerEN.stem(item.lower()) for item in tokens]
+    return stems
 def predictSMSdata(test_text):
     categories = ["legitimate", "spam"]
     categories.sort()