Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,6 +10,12 @@ import sklearn
|
|
| 10 |
|
| 11 |
nltk.download('punkt')
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
def predictSMSdata(test_text):
|
| 14 |
categories = ["legitimate", "spam"]
|
| 15 |
categories.sort()
|
|
|
|
| 10 |
|
| 11 |
nltk.download('punkt')
|
| 12 |
|
| 13 |
+
def custom_tokenizer_with_English_stemmer(text):
|
| 14 |
+
# my text was unicode so I had to use the unicode-specific translate function. If your documents are strings, you will need to use a different `translate` function here. `Translated` here just does search-replace. See the trans_table: any matching character in the set is replaced with `None`
|
| 15 |
+
tokens = [word for word in nltk.word_tokenize(text)]
|
| 16 |
+
stems = [stemmerEN.stem(item.lower()) for item in tokens]
|
| 17 |
+
return stems
|
| 18 |
+
|
| 19 |
def predictSMSdata(test_text):
|
| 20 |
categories = ["legitimate", "spam"]
|
| 21 |
categories.sort()
|