Spaces:

devtitan
/

phishing-email-detector

Sleeping

App Files Files Community

devtitan commited on Jul 19

Commit

3368ea3

verified ·

1 Parent(s): 91bed75

Upload 9 files

Browse files

Files changed (10) hide show

.devcontainer/devcontainer.json +31 -0
.devcontainer/icon.svg +1 -0
.gitattributes +1 -0
.gitignore +1 -0
README.md +24 -12
app.py +117 -0
requirements.txt +3 -0
static/Octocat.png +3 -0
static/main.css +45 -0
templates/index.html +65 -0

.devcontainer/devcontainer.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "image": "mcr.microsoft.com/devcontainers/universal:2",
+  "hostRequirements": {
+    "cpus": 4
+  },
+  "waitFor": "onCreateCommand",
+  "updateContentCommand": "pip install -r requirements.txt",
+  "postCreateCommand": "",
+  "postAttachCommand": {
+    "server": "flask --debug run"
+  },
+  "portsAttributes": {
+    "5000": {
+      "label": "Application",
+      "onAutoForward": "openPreview"
+    }
+  },
+  "customizations": {
+    "codespaces": {
+      "openFiles": [
+        "templates/index.html"
+      ]
+    },
+    "vscode": {
+      "extensions": [
+        "ms-python.python"
+      ]
+    }
+  },
+  "forwardPorts": [5000]
+}

.devcontainer/icon.svg ADDED Viewed

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+static/Octocat.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__/

README.md CHANGED Viewed

@@ -1,12 +1,24 @@
----
-title: Phishing Email Detector
-emoji: 💻
-colorFrom: green
-colorTo: indigo
-sdk: docker
-pinned: false
-license: apache-2.0
-short_description: Easy to use phishing email detector
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Phishing Email Detector
+## Description
+This project is a web application built with Flask that analyzes email text content to detect potential phishing attempts. It utilizes a pre-trained machine learning model from the Hugging Face Transformers library to classify emails. [cite: 1]
+## Features
+* Provides a simple web interface to paste and analyze email text.
+* Uses the `cybersectony/phishing-email-detection-distilbert_v2.4.1` model for classification. [cite: 1]
+* Displays the most likely classification (e.g., "Likely Legitimate", "Suspicious / Phishing Link Likely"). [cite: 1]
+* Shows the confidence score for the top prediction. [cite: 1]
+* Provides detailed probabilities for all classification categories considered by the model. [cite: 1]
+## Dependencies
+* Python 3.x
+* Flask [cite: 1]
+* Hugging Face Transformers (`transformers`) [cite: 1]
+* PyTorch (`torch`) [cite: 1]
+You can install the Python dependencies using pip:
+```bash
+pip install Flask transformers torch

app.py ADDED Viewed

	@@ -0,0 +1,117 @@

+# app.py
+from flask import Flask, request, render_template
+# Import necessary classes from transformers and torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import operator # To find max value in dictionary if needed (alternative to user's lambda)
+# Initialize Flask app
+app = Flask(__name__)
+# --- Load Tokenizer and Model (Using your provided code) ---
+# Load them globally when the app starts
+model_name = "cybersectony/phishing-email-detection-distilbert_v2.4.1"
+try:
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSequenceClassification.from_pretrained(model_name)
+    # Set the model to evaluation mode (important for inference)
+    model.eval()
+    print(f"Tokenizer and Model '{model_name}' loaded successfully.")
+    # You can optionally print the model's expected labels if available in config
+    # print(f"Model config labels (if available): {model.config.id2label}")
+except Exception as e:
+    print(f"Error loading tokenizer or model '{model_name}': {e}")
+    tokenizer = None
+    model = None # Flag that loading failed
+# --- Prediction Function (Your provided code) ---
+def predict_email(email_text):
+    if not tokenizer or not model:
+        raise RuntimeError("Tokenizer or Model not loaded.") # Should not happen if initial check passes
+    # Preprocess and tokenize
+    inputs = tokenizer(
+        email_text,
+        return_tensors="pt", # PyTorch tensors
+        truncation=True,     # Truncate long emails
+        max_length=512       # Max sequence length for the model
+    )
+    # Get prediction - no need to track gradients for inference
+    with torch.no_grad():
+        outputs = model(**inputs)
+        # Apply softmax to logits to get probabilities
+        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+    # Get probabilities for each class (index matters!)
+    probs = predictions[0].tolist() # Get the probabilities for the first (only) input
+    # --- Create labels dictionary ---
+    # IMPORTANT: This assumes the model's output logits correspond to these labels IN THIS ORDER.
+    # Verify this order based on the model card or model.config.id2label if possible.
+    labels = {
+        "Legitimate Email": probs[0],
+        "Phishing Link Detected": probs[1], # Assuming 'phishing_url' means a bad link found
+        "Legitimate Link Detected": probs[2], # Assuming 'legitimate_url' means a good link found
+        "Phishing Link Detected (Alt)": probs[3] # Assuming 'phishing_url_alt' is also bad
+    }
+    # Determine the most likely classification based on highest probability
+    # Using operator.itemgetter is slightly more standard than lambda for this case
+    max_label_item = max(labels.items(), key=operator.itemgetter(1))
+    return {
+        "prediction": max_label_item[0],  # The label name with the highest probability
+        "confidence": max_label_item[1],  # The highest probability value
+        "all_probabilities": labels       # Dictionary of all labels and their probabilities
+    }
+# --- Flask Routes ---
+@app.route('/', methods=['GET', 'POST'])
+def index():
+    prediction_result = None
+    email_text_input = ""
+    error_message = None
+    friendly_label_display = None # For simple display (e.g., Phishing/Legitimate)
+    result_details = None # To hold the full dictionary from predict_email
+    # Check if model loaded correctly at startup
+    if not tokenizer or not model:
+         error_message = "Phishing detection model could not be loaded. Please check the server logs."
+         # Pass the error immediately to the template
+         return render_template('index.html', error=error_message)
+    if request.method == 'POST':
+        email_text_input = request.form['text']
+        if email_text_input:
+            try:
+                # Perform classification using your function
+                result_details = predict_email(email_text_input)
+                prediction_result = result_details['prediction'] # Get the top prediction label
+                print(f"Input: '{email_text_input[:100]}...', Result: {result_details}") # Log detailed result
+                # --- Determine a simple display label ---
+                # Customize this logic based on how you want to interpret the model's specific labels
+                if "Phishing Link" in prediction_result:
+                     friendly_label_display = "Suspicious / Phishing Link Likely"
+                elif "Legitimate" in prediction_result:
+                     friendly_label_display = "Likely Legitimate"
+                else:
+                     friendly_label_display = prediction_result # Fallback to the raw label name
+            except Exception as e:
+                 print(f"Error during prediction: {e}")
+                 error_message = f"An error occurred during analysis: {e}"
+    # Render the HTML template
+    return render_template(
+        'index.html',
+        result=result_details, # Pass the whole result dictionary
+        friendly_label=friendly_label_display, # Pass the simplified label
+        text=email_text_input,
+        error=error_message
+    )
+# Run the Flask app
+if __name__ == '__main__':
+    app.run(debug=True) # Set debug=False for production

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+Flask
+transformers
+torch

static/Octocat.png ADDED Viewed

Git LFS Details

SHA256: ff7ce735496cfc747d717041ce95e6a36f27332f69f4cf8d9d7c8348926b6dc9
Pointer size: 132 Bytes
Size of remote file: 2.13 MB

static/main.css ADDED Viewed

	@@ -0,0 +1,45 @@

+body {
+    margin: 0;
+    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Oxygen",
+        "Ubuntu", "Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue",
+        sans-serif;
+    -webkit-font-smoothing: antialiased;
+    -moz-osx-font-smoothing: grayscale;
+}
+code {
+    font-family: source-code-pro, Menlo, Monaco, Consolas, "Courier New",
+        monospace;
+}
+.App {
+    text-align: center;
+}
+.App-logo {
+    height: 40vmin;
+    pointer-events: none;
+}
+.App-header {
+    background-color: #282c34;
+    min-height: 100vh;
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    justify-content: center;
+    font-size: calc(10px + 2vmin);
+    color: white;
+}
+.App-link {
+    color: #61dafb;
+}
+.heart {
+    color: #ff0000;
+}
+.small {
+    font-size: 0.75rem;
+}

templates/index.html ADDED Viewed

	@@ -0,0 +1,65 @@

+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1">
+    <title>Phishing Email Detector</title>
+    <style>
+      body { font-family: sans-serif; max-width: 700px; margin: 50px auto; padding: 20px; border: 1px solid #eee; border-radius: 8px; }
+      textarea { width: 100%; min-height: 150px; margin-bottom: 10px; }
+      button { padding: 10px 15px; cursor: pointer; }
+      .result-summary { margin-top: 20px; padding: 15px; border-radius: 5px; border: 1px solid; }
+      .result-details { margin-top: 10px; padding: 10px; background-color: #f8f9fa; border: 1px dashed #ccc; border-radius: 4px; font-size: 0.9em; }
+      .result-details ul { padding-left: 20px; margin: 5px 0;}
+      /* Update CSS classes based on simplified label */
+      .suspicious { background-color: #f8d7da; border-color: #f5c6cb; color: #721c24; }
+      .legitimate { background-color: #d4edda; border-color: #c3e6cb; color: #155724; }
+      .unknown { background-color: #e2e3e5; border-color: #d6d8db; color: #383d41; }
+      .error { background-color: #f8d7da; border-color: #f5c6cb; color: #721c24; margin-top: 15px; padding: 10px;}
+      .disclaimer { font-size: 0.8em; color: #666; margin-top: 30px; border-top: 1px solid #eee; padding-top: 10px;}
+    </style>
+  </head>
+  <body>
+    <h1>Phishing Email Detector</h1>
+    <p>Enter the full text content of an email below to analyze it.</p>
+    <p><strong>Disclaimer:</strong> This tool (using model <code>{{ model_name }}</code>) is for educational purposes and may not be accurate. Do not rely solely on this for security decisions.</p>
+    {% if error and not result %}
+        <div class="error">
+            <strong>Error:</strong> {{ error }}
+        </div>
+    {% endif %}
+    <form method="post">
+      <textarea name="text" placeholder="Paste email body here...">{{ text }}</textarea><br>
+      <button type="submit">Analyze Email</button>
+    </form>
+    {% if result and friendly_label %}
+      <div class="result-summary {% if 'Suspicious' in friendly_label %}suspicious{% elif 'Legitimate' in friendly_label %}legitimate{% else %}unknown{% endif %}">
+        <h2>Analysis Result Summary</h2>
+        <p><strong>Overall Assessment:</strong> {{ friendly_label }}</p>
+        <p><strong>Top Prediction:</strong> {{ result.prediction }}</p> <p><strong>Confidence Score:</strong> {{ "%.4f"|format(result.confidence) }}</p>
+      </div>
+      <div class="result-details">
+          <strong>Detailed Probabilities:</strong>
+          <ul>
+              {% for label, probability in result.all_probabilities.items() %}
+                  <li>{{ label }}: {{ "%.4f"|format(probability) }}</li>
+              {% endfor %}
+          </ul>
+      </div>
+    {% elif error %}
+         <div class="error">
+            <strong>Error during analysis:</strong> {{ error }}
+        </div>
+    {% endif %}
+     <div class="disclaimer">
+        <strong>Reminder:</strong> Phishing detection is complex. This model provides probabilities for specific categories based on the text. Always look for other signs like sender addresses, urgent requests, unexpected attachments, and suspicious links.
+    </div>
+  </body>
+</html>