Spaces:

k-code
/

spam-LSTM-model

Sleeping

App Files Files Community

k-code commited on Mar 27, 2025

Commit

87476a9

1 Parent(s): ea57aca

init

Browse files

Files changed (5) hide show

.gitignore +1 -0
.gradio/certificate.pem +31 -0
app.py +135 -0
best_model.pth +3 -0
requirements.txt +70 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .venv

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

app.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import re
+import gradio as gr
+from datasets import load_dataset
+import torch
+from torch.utils.data import random_split
+from collections import Counter
+import torch.nn as nn
+class LSTMClassifier(nn.Module):
+    def __init__(self, vocab_size, embedding_dim=200, hidden_dim=256):
+        super(LSTMClassifier, self).__init__()
+        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
+        self.lstm = nn.LSTM(
+            embedding_dim,
+            hidden_dim,
+            num_layers=2,
+            batch_first=True,
+            bidirectional=True,
+            dropout=0.3,
+        )
+        # Dropout layer
+        self.dropout = nn.Dropout(0.4)
+        # Additional dense layers
+        self.fc1 = nn.Linear(hidden_dim * 2, hidden_dim)
+        self.fc2 = nn.Linear(hidden_dim, 2)
+    def forward(self, x):
+        embedded = self.embedding(x)
+        lstm_out, (hidden, cell) = self.lstm(embedded)
+        # Concatenate forward and backward hidden states
+        hidden = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1)
+        hidden = self.dropout(hidden)
+        # Additional layer with ReLU activation
+        hidden = torch.relu(self.fc1(hidden))
+        hidden = self.dropout(hidden)
+        # Final classification layer
+        out = self.fc2(hidden)
+        return out
+def create_vocabulary(ds, max_words=10000):
+    word2idx = {
+        "<PAD>": 0,
+        "<UNK>": 1,
+    }
+    words = []
+    for example in ds:
+        text = example["sms"]
+        text = text.lower()
+        text = re.sub(r"[^\w\s]", "", text)
+        words.extend(text.split())
+    word_counts = Counter(words)
+    common_words = word_counts.most_common(max_words - 2)
+    for word, _ in common_words:
+        word2idx[word] = len(word2idx)
+    return word2idx
+def create_splits(ds):
+    # 80/20 split
+    full_dataset = ds['train']
+    train_size = int(0.8 * len(full_dataset))
+    test_size = len(full_dataset) - train_size
+    train_dataset, test_dataset = random_split(
+        full_dataset,
+        [train_size, test_size],
+        generator=torch.Generator().manual_seed(42),
+    )
+    return train_dataset, test_dataset
+ds = load_dataset("ucirvine/sms_spam")
+train_dataset, test_dataset = create_splits(ds)
+vocab = create_vocabulary(train_dataset)
+# First recreate the model architecture
+model = LSTMClassifier(len(vocab), 100)
+# Load the saved state dict
+model.load_state_dict(torch.load('best_model.pth'))
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model = model.to(device)
+def predict_text(model, text, word2idx, device, max_length=50):
+    # Set model to evaluation mode
+    model.eval()
+    # Preprocess the text (same as training)
+    text = text.lower()
+    words = text.split()
+    # Convert words to indices
+    indices = [word2idx.get(word, word2idx['<UNK>']) for word in words]
+    # Pad or truncate
+    if len(indices) < max_length:
+        indices += [word2idx['<PAD>']] * (max_length - len(indices))
+    else:
+        indices = indices[:max_length]
+    # Convert to tensor
+    with torch.no_grad():
+        input_tensor = torch.tensor(indices).unsqueeze(
+            0).to(device)  # Add batch dimension
+        outputs = model(input_tensor)
+        probabilities = torch.softmax(outputs, dim=1)
+        prediction = torch.argmax(outputs, dim=1)
+    return {
+        'prediction': 'spam' if prediction.item() == 1 else 'ham',
+        'confidence': probabilities[0][prediction].item()
+    }
+interface = gr.Interface(
+    fn=lambda text: predict_text(model, text, vocab, device),
+    inputs=gr.Textbox(lines=2, placeholder="Enter your text here..."),
+    outputs=gr.Textbox(),
+    title="SMS Spam Classifier",
+    description="Enter a text message to predict if it's spam or ham.",
+)
+interface.launch(share=True)

best_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:29dad210d08207edb98d9c2052f9822968170e10121292a578f787d438430e2f
+size 13158306

requirements.txt ADDED Viewed

	@@ -0,0 +1,70 @@

+aiofiles==23.2.1
+aiohappyeyeballs==2.6.1
+aiohttp==3.11.14
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.9.0
+attrs==25.3.0
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+datasets==3.4.1
+dill==0.3.8
+fastapi==0.115.12
+ffmpy==0.5.0
+filelock==3.18.0
+frozenlist==1.5.0
+fsspec==2024.12.0
+gradio==5.23.1
+gradio_client==1.8.0
+groovy==0.1.2
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.29.3
+idna==3.10
+Jinja2==3.1.6
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+mdurl==0.1.2
+mpmath==1.3.0
+multidict==6.2.0
+multiprocess==0.70.16
+networkx==3.4.2
+numpy==2.2.4
+orjson==3.10.16
+packaging==24.2
+pandas==2.2.3
+pillow==11.1.0
+propcache==0.3.1
+pyarrow==19.0.1
+pydantic==2.10.6
+pydantic_core==2.27.2
+pydub==0.25.1
+Pygments==2.19.1
+python-dateutil==2.9.0.post0
+python-multipart==0.0.20
+pytz==2025.2
+PyYAML==6.0.2
+requests==2.32.3
+rich==13.9.4
+ruff==0.11.2
+safehttpx==0.1.6
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+starlette==0.46.1
+sympy==1.13.1
+tomlkit==0.13.2
+torch==2.6.0
+torchvision==0.21.0
+tqdm==4.67.1
+typer==0.15.2
+typing_extensions==4.13.0
+tzdata==2025.2
+urllib3==2.3.0
+uvicorn==0.34.0
+websockets==15.0.1
+xxhash==3.5.0
+yarl==1.18.3