Spaces:
Sleeping
Sleeping
Upload 9 files
Browse files- .devcontainer/devcontainer.json +31 -0
- .devcontainer/icon.svg +1 -0
- .gitattributes +1 -0
- .gitignore +1 -0
- README.md +24 -12
- app.py +117 -0
- requirements.txt +3 -0
- static/Octocat.png +3 -0
- static/main.css +45 -0
- templates/index.html +65 -0
.devcontainer/devcontainer.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"image": "mcr.microsoft.com/devcontainers/universal:2",
|
| 3 |
+
"hostRequirements": {
|
| 4 |
+
"cpus": 4
|
| 5 |
+
},
|
| 6 |
+
"waitFor": "onCreateCommand",
|
| 7 |
+
"updateContentCommand": "pip install -r requirements.txt",
|
| 8 |
+
"postCreateCommand": "",
|
| 9 |
+
"postAttachCommand": {
|
| 10 |
+
"server": "flask --debug run"
|
| 11 |
+
},
|
| 12 |
+
"portsAttributes": {
|
| 13 |
+
"5000": {
|
| 14 |
+
"label": "Application",
|
| 15 |
+
"onAutoForward": "openPreview"
|
| 16 |
+
}
|
| 17 |
+
},
|
| 18 |
+
"customizations": {
|
| 19 |
+
"codespaces": {
|
| 20 |
+
"openFiles": [
|
| 21 |
+
"templates/index.html"
|
| 22 |
+
]
|
| 23 |
+
},
|
| 24 |
+
"vscode": {
|
| 25 |
+
"extensions": [
|
| 26 |
+
"ms-python.python"
|
| 27 |
+
]
|
| 28 |
+
}
|
| 29 |
+
},
|
| 30 |
+
"forwardPorts": [5000]
|
| 31 |
+
}
|
.devcontainer/icon.svg
ADDED
|
|
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
static/Octocat.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
README.md
CHANGED
|
@@ -1,12 +1,24 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Phishing Email Detector
|
| 2 |
+
|
| 3 |
+
## Description
|
| 4 |
+
|
| 5 |
+
This project is a web application built with Flask that analyzes email text content to detect potential phishing attempts. It utilizes a pre-trained machine learning model from the Hugging Face Transformers library to classify emails. [cite: 1]
|
| 6 |
+
|
| 7 |
+
## Features
|
| 8 |
+
|
| 9 |
+
* Provides a simple web interface to paste and analyze email text.
|
| 10 |
+
* Uses the `cybersectony/phishing-email-detection-distilbert_v2.4.1` model for classification. [cite: 1]
|
| 11 |
+
* Displays the most likely classification (e.g., "Likely Legitimate", "Suspicious / Phishing Link Likely"). [cite: 1]
|
| 12 |
+
* Shows the confidence score for the top prediction. [cite: 1]
|
| 13 |
+
* Provides detailed probabilities for all classification categories considered by the model. [cite: 1]
|
| 14 |
+
|
| 15 |
+
## Dependencies
|
| 16 |
+
|
| 17 |
+
* Python 3.x
|
| 18 |
+
* Flask [cite: 1]
|
| 19 |
+
* Hugging Face Transformers (`transformers`) [cite: 1]
|
| 20 |
+
* PyTorch (`torch`) [cite: 1]
|
| 21 |
+
|
| 22 |
+
You can install the Python dependencies using pip:
|
| 23 |
+
```bash
|
| 24 |
+
pip install Flask transformers torch
|
app.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
from flask import Flask, request, render_template
|
| 3 |
+
# Import necessary classes from transformers and torch
|
| 4 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 5 |
+
import torch
|
| 6 |
+
import operator # To find max value in dictionary if needed (alternative to user's lambda)
|
| 7 |
+
|
| 8 |
+
# Initialize Flask app
|
| 9 |
+
app = Flask(__name__)
|
| 10 |
+
|
| 11 |
+
# --- Load Tokenizer and Model (Using your provided code) ---
|
| 12 |
+
# Load them globally when the app starts
|
| 13 |
+
model_name = "cybersectony/phishing-email-detection-distilbert_v2.4.1"
|
| 14 |
+
try:
|
| 15 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 16 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
| 17 |
+
# Set the model to evaluation mode (important for inference)
|
| 18 |
+
model.eval()
|
| 19 |
+
print(f"Tokenizer and Model '{model_name}' loaded successfully.")
|
| 20 |
+
# You can optionally print the model's expected labels if available in config
|
| 21 |
+
# print(f"Model config labels (if available): {model.config.id2label}")
|
| 22 |
+
except Exception as e:
|
| 23 |
+
print(f"Error loading tokenizer or model '{model_name}': {e}")
|
| 24 |
+
tokenizer = None
|
| 25 |
+
model = None # Flag that loading failed
|
| 26 |
+
|
| 27 |
+
# --- Prediction Function (Your provided code) ---
|
| 28 |
+
def predict_email(email_text):
|
| 29 |
+
if not tokenizer or not model:
|
| 30 |
+
raise RuntimeError("Tokenizer or Model not loaded.") # Should not happen if initial check passes
|
| 31 |
+
|
| 32 |
+
# Preprocess and tokenize
|
| 33 |
+
inputs = tokenizer(
|
| 34 |
+
email_text,
|
| 35 |
+
return_tensors="pt", # PyTorch tensors
|
| 36 |
+
truncation=True, # Truncate long emails
|
| 37 |
+
max_length=512 # Max sequence length for the model
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
# Get prediction - no need to track gradients for inference
|
| 41 |
+
with torch.no_grad():
|
| 42 |
+
outputs = model(**inputs)
|
| 43 |
+
# Apply softmax to logits to get probabilities
|
| 44 |
+
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
| 45 |
+
|
| 46 |
+
# Get probabilities for each class (index matters!)
|
| 47 |
+
probs = predictions[0].tolist() # Get the probabilities for the first (only) input
|
| 48 |
+
|
| 49 |
+
# --- Create labels dictionary ---
|
| 50 |
+
# IMPORTANT: This assumes the model's output logits correspond to these labels IN THIS ORDER.
|
| 51 |
+
# Verify this order based on the model card or model.config.id2label if possible.
|
| 52 |
+
labels = {
|
| 53 |
+
"Legitimate Email": probs[0],
|
| 54 |
+
"Phishing Link Detected": probs[1], # Assuming 'phishing_url' means a bad link found
|
| 55 |
+
"Legitimate Link Detected": probs[2], # Assuming 'legitimate_url' means a good link found
|
| 56 |
+
"Phishing Link Detected (Alt)": probs[3] # Assuming 'phishing_url_alt' is also bad
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
# Determine the most likely classification based on highest probability
|
| 60 |
+
# Using operator.itemgetter is slightly more standard than lambda for this case
|
| 61 |
+
max_label_item = max(labels.items(), key=operator.itemgetter(1))
|
| 62 |
+
|
| 63 |
+
return {
|
| 64 |
+
"prediction": max_label_item[0], # The label name with the highest probability
|
| 65 |
+
"confidence": max_label_item[1], # The highest probability value
|
| 66 |
+
"all_probabilities": labels # Dictionary of all labels and their probabilities
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
# --- Flask Routes ---
|
| 70 |
+
@app.route('/', methods=['GET', 'POST'])
|
| 71 |
+
def index():
|
| 72 |
+
prediction_result = None
|
| 73 |
+
email_text_input = ""
|
| 74 |
+
error_message = None
|
| 75 |
+
friendly_label_display = None # For simple display (e.g., Phishing/Legitimate)
|
| 76 |
+
result_details = None # To hold the full dictionary from predict_email
|
| 77 |
+
|
| 78 |
+
# Check if model loaded correctly at startup
|
| 79 |
+
if not tokenizer or not model:
|
| 80 |
+
error_message = "Phishing detection model could not be loaded. Please check the server logs."
|
| 81 |
+
# Pass the error immediately to the template
|
| 82 |
+
return render_template('index.html', error=error_message)
|
| 83 |
+
|
| 84 |
+
if request.method == 'POST':
|
| 85 |
+
email_text_input = request.form['text']
|
| 86 |
+
if email_text_input:
|
| 87 |
+
try:
|
| 88 |
+
# Perform classification using your function
|
| 89 |
+
result_details = predict_email(email_text_input)
|
| 90 |
+
prediction_result = result_details['prediction'] # Get the top prediction label
|
| 91 |
+
print(f"Input: '{email_text_input[:100]}...', Result: {result_details}") # Log detailed result
|
| 92 |
+
|
| 93 |
+
# --- Determine a simple display label ---
|
| 94 |
+
# Customize this logic based on how you want to interpret the model's specific labels
|
| 95 |
+
if "Phishing Link" in prediction_result:
|
| 96 |
+
friendly_label_display = "Suspicious / Phishing Link Likely"
|
| 97 |
+
elif "Legitimate" in prediction_result:
|
| 98 |
+
friendly_label_display = "Likely Legitimate"
|
| 99 |
+
else:
|
| 100 |
+
friendly_label_display = prediction_result # Fallback to the raw label name
|
| 101 |
+
|
| 102 |
+
except Exception as e:
|
| 103 |
+
print(f"Error during prediction: {e}")
|
| 104 |
+
error_message = f"An error occurred during analysis: {e}"
|
| 105 |
+
|
| 106 |
+
# Render the HTML template
|
| 107 |
+
return render_template(
|
| 108 |
+
'index.html',
|
| 109 |
+
result=result_details, # Pass the whole result dictionary
|
| 110 |
+
friendly_label=friendly_label_display, # Pass the simplified label
|
| 111 |
+
text=email_text_input,
|
| 112 |
+
error=error_message
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
# Run the Flask app
|
| 116 |
+
if __name__ == '__main__':
|
| 117 |
+
app.run(debug=True) # Set debug=False for production
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Flask
|
| 2 |
+
transformers
|
| 3 |
+
torch
|
static/Octocat.png
ADDED
|
Git LFS Details
|
static/main.css
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
body {
|
| 2 |
+
margin: 0;
|
| 3 |
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Oxygen",
|
| 4 |
+
"Ubuntu", "Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue",
|
| 5 |
+
sans-serif;
|
| 6 |
+
-webkit-font-smoothing: antialiased;
|
| 7 |
+
-moz-osx-font-smoothing: grayscale;
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
code {
|
| 11 |
+
font-family: source-code-pro, Menlo, Monaco, Consolas, "Courier New",
|
| 12 |
+
monospace;
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
.App {
|
| 16 |
+
text-align: center;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
.App-logo {
|
| 20 |
+
height: 40vmin;
|
| 21 |
+
pointer-events: none;
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
.App-header {
|
| 25 |
+
background-color: #282c34;
|
| 26 |
+
min-height: 100vh;
|
| 27 |
+
display: flex;
|
| 28 |
+
flex-direction: column;
|
| 29 |
+
align-items: center;
|
| 30 |
+
justify-content: center;
|
| 31 |
+
font-size: calc(10px + 2vmin);
|
| 32 |
+
color: white;
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
.App-link {
|
| 36 |
+
color: #61dafb;
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
.heart {
|
| 40 |
+
color: #ff0000;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
.small {
|
| 44 |
+
font-size: 0.75rem;
|
| 45 |
+
}
|
templates/index.html
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!doctype html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="utf-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1">
|
| 6 |
+
<title>Phishing Email Detector</title>
|
| 7 |
+
<style>
|
| 8 |
+
body { font-family: sans-serif; max-width: 700px; margin: 50px auto; padding: 20px; border: 1px solid #eee; border-radius: 8px; }
|
| 9 |
+
textarea { width: 100%; min-height: 150px; margin-bottom: 10px; }
|
| 10 |
+
button { padding: 10px 15px; cursor: pointer; }
|
| 11 |
+
.result-summary { margin-top: 20px; padding: 15px; border-radius: 5px; border: 1px solid; }
|
| 12 |
+
.result-details { margin-top: 10px; padding: 10px; background-color: #f8f9fa; border: 1px dashed #ccc; border-radius: 4px; font-size: 0.9em; }
|
| 13 |
+
.result-details ul { padding-left: 20px; margin: 5px 0;}
|
| 14 |
+
/* Update CSS classes based on simplified label */
|
| 15 |
+
.suspicious { background-color: #f8d7da; border-color: #f5c6cb; color: #721c24; }
|
| 16 |
+
.legitimate { background-color: #d4edda; border-color: #c3e6cb; color: #155724; }
|
| 17 |
+
.unknown { background-color: #e2e3e5; border-color: #d6d8db; color: #383d41; }
|
| 18 |
+
.error { background-color: #f8d7da; border-color: #f5c6cb; color: #721c24; margin-top: 15px; padding: 10px;}
|
| 19 |
+
.disclaimer { font-size: 0.8em; color: #666; margin-top: 30px; border-top: 1px solid #eee; padding-top: 10px;}
|
| 20 |
+
</style>
|
| 21 |
+
</head>
|
| 22 |
+
<body>
|
| 23 |
+
<h1>Phishing Email Detector</h1>
|
| 24 |
+
<p>Enter the full text content of an email below to analyze it.</p>
|
| 25 |
+
<p><strong>Disclaimer:</strong> This tool (using model <code>{{ model_name }}</code>) is for educational purposes and may not be accurate. Do not rely solely on this for security decisions.</p>
|
| 26 |
+
|
| 27 |
+
{% if error and not result %}
|
| 28 |
+
<div class="error">
|
| 29 |
+
<strong>Error:</strong> {{ error }}
|
| 30 |
+
</div>
|
| 31 |
+
{% endif %}
|
| 32 |
+
|
| 33 |
+
<form method="post">
|
| 34 |
+
<textarea name="text" placeholder="Paste email body here...">{{ text }}</textarea><br>
|
| 35 |
+
<button type="submit">Analyze Email</button>
|
| 36 |
+
</form>
|
| 37 |
+
|
| 38 |
+
{% if result and friendly_label %}
|
| 39 |
+
<div class="result-summary {% if 'Suspicious' in friendly_label %}suspicious{% elif 'Legitimate' in friendly_label %}legitimate{% else %}unknown{% endif %}">
|
| 40 |
+
<h2>Analysis Result Summary</h2>
|
| 41 |
+
<p><strong>Overall Assessment:</strong> {{ friendly_label }}</p>
|
| 42 |
+
<p><strong>Top Prediction:</strong> {{ result.prediction }}</p> <p><strong>Confidence Score:</strong> {{ "%.4f"|format(result.confidence) }}</p>
|
| 43 |
+
</div>
|
| 44 |
+
|
| 45 |
+
<div class="result-details">
|
| 46 |
+
<strong>Detailed Probabilities:</strong>
|
| 47 |
+
<ul>
|
| 48 |
+
{% for label, probability in result.all_probabilities.items() %}
|
| 49 |
+
<li>{{ label }}: {{ "%.4f"|format(probability) }}</li>
|
| 50 |
+
{% endfor %}
|
| 51 |
+
</ul>
|
| 52 |
+
</div>
|
| 53 |
+
|
| 54 |
+
{% elif error %}
|
| 55 |
+
<div class="error">
|
| 56 |
+
<strong>Error during analysis:</strong> {{ error }}
|
| 57 |
+
</div>
|
| 58 |
+
{% endif %}
|
| 59 |
+
|
| 60 |
+
<div class="disclaimer">
|
| 61 |
+
<strong>Reminder:</strong> Phishing detection is complex. This model provides probabilities for specific categories based on the text. Always look for other signs like sender addresses, urgent requests, unexpected attachments, and suspicious links.
|
| 62 |
+
</div>
|
| 63 |
+
|
| 64 |
+
</body>
|
| 65 |
+
</html>
|