devtitan commited on
Commit
3368ea3
·
verified ·
1 Parent(s): 91bed75

Upload 9 files

Browse files
.devcontainer/devcontainer.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image": "mcr.microsoft.com/devcontainers/universal:2",
3
+ "hostRequirements": {
4
+ "cpus": 4
5
+ },
6
+ "waitFor": "onCreateCommand",
7
+ "updateContentCommand": "pip install -r requirements.txt",
8
+ "postCreateCommand": "",
9
+ "postAttachCommand": {
10
+ "server": "flask --debug run"
11
+ },
12
+ "portsAttributes": {
13
+ "5000": {
14
+ "label": "Application",
15
+ "onAutoForward": "openPreview"
16
+ }
17
+ },
18
+ "customizations": {
19
+ "codespaces": {
20
+ "openFiles": [
21
+ "templates/index.html"
22
+ ]
23
+ },
24
+ "vscode": {
25
+ "extensions": [
26
+ "ms-python.python"
27
+ ]
28
+ }
29
+ },
30
+ "forwardPorts": [5000]
31
+ }
.devcontainer/icon.svg ADDED
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ static/Octocat.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__/
README.md CHANGED
@@ -1,12 +1,24 @@
1
- ---
2
- title: Phishing Email Detector
3
- emoji: 💻
4
- colorFrom: green
5
- colorTo: indigo
6
- sdk: docker
7
- pinned: false
8
- license: apache-2.0
9
- short_description: Easy to use phishing email detector
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Phishing Email Detector
2
+
3
+ ## Description
4
+
5
+ This project is a web application built with Flask that analyzes email text content to detect potential phishing attempts. It utilizes a pre-trained machine learning model from the Hugging Face Transformers library to classify emails. [cite: 1]
6
+
7
+ ## Features
8
+
9
+ * Provides a simple web interface to paste and analyze email text.
10
+ * Uses the `cybersectony/phishing-email-detection-distilbert_v2.4.1` model for classification. [cite: 1]
11
+ * Displays the most likely classification (e.g., "Likely Legitimate", "Suspicious / Phishing Link Likely"). [cite: 1]
12
+ * Shows the confidence score for the top prediction. [cite: 1]
13
+ * Provides detailed probabilities for all classification categories considered by the model. [cite: 1]
14
+
15
+ ## Dependencies
16
+
17
+ * Python 3.x
18
+ * Flask [cite: 1]
19
+ * Hugging Face Transformers (`transformers`) [cite: 1]
20
+ * PyTorch (`torch`) [cite: 1]
21
+
22
+ You can install the Python dependencies using pip:
23
+ ```bash
24
+ pip install Flask transformers torch
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ from flask import Flask, request, render_template
3
+ # Import necessary classes from transformers and torch
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+ import torch
6
+ import operator # To find max value in dictionary if needed (alternative to user's lambda)
7
+
8
+ # Initialize Flask app
9
+ app = Flask(__name__)
10
+
11
+ # --- Load Tokenizer and Model (Using your provided code) ---
12
+ # Load them globally when the app starts
13
+ model_name = "cybersectony/phishing-email-detection-distilbert_v2.4.1"
14
+ try:
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
16
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
17
+ # Set the model to evaluation mode (important for inference)
18
+ model.eval()
19
+ print(f"Tokenizer and Model '{model_name}' loaded successfully.")
20
+ # You can optionally print the model's expected labels if available in config
21
+ # print(f"Model config labels (if available): {model.config.id2label}")
22
+ except Exception as e:
23
+ print(f"Error loading tokenizer or model '{model_name}': {e}")
24
+ tokenizer = None
25
+ model = None # Flag that loading failed
26
+
27
+ # --- Prediction Function (Your provided code) ---
28
+ def predict_email(email_text):
29
+ if not tokenizer or not model:
30
+ raise RuntimeError("Tokenizer or Model not loaded.") # Should not happen if initial check passes
31
+
32
+ # Preprocess and tokenize
33
+ inputs = tokenizer(
34
+ email_text,
35
+ return_tensors="pt", # PyTorch tensors
36
+ truncation=True, # Truncate long emails
37
+ max_length=512 # Max sequence length for the model
38
+ )
39
+
40
+ # Get prediction - no need to track gradients for inference
41
+ with torch.no_grad():
42
+ outputs = model(**inputs)
43
+ # Apply softmax to logits to get probabilities
44
+ predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
45
+
46
+ # Get probabilities for each class (index matters!)
47
+ probs = predictions[0].tolist() # Get the probabilities for the first (only) input
48
+
49
+ # --- Create labels dictionary ---
50
+ # IMPORTANT: This assumes the model's output logits correspond to these labels IN THIS ORDER.
51
+ # Verify this order based on the model card or model.config.id2label if possible.
52
+ labels = {
53
+ "Legitimate Email": probs[0],
54
+ "Phishing Link Detected": probs[1], # Assuming 'phishing_url' means a bad link found
55
+ "Legitimate Link Detected": probs[2], # Assuming 'legitimate_url' means a good link found
56
+ "Phishing Link Detected (Alt)": probs[3] # Assuming 'phishing_url_alt' is also bad
57
+ }
58
+
59
+ # Determine the most likely classification based on highest probability
60
+ # Using operator.itemgetter is slightly more standard than lambda for this case
61
+ max_label_item = max(labels.items(), key=operator.itemgetter(1))
62
+
63
+ return {
64
+ "prediction": max_label_item[0], # The label name with the highest probability
65
+ "confidence": max_label_item[1], # The highest probability value
66
+ "all_probabilities": labels # Dictionary of all labels and their probabilities
67
+ }
68
+
69
+ # --- Flask Routes ---
70
+ @app.route('/', methods=['GET', 'POST'])
71
+ def index():
72
+ prediction_result = None
73
+ email_text_input = ""
74
+ error_message = None
75
+ friendly_label_display = None # For simple display (e.g., Phishing/Legitimate)
76
+ result_details = None # To hold the full dictionary from predict_email
77
+
78
+ # Check if model loaded correctly at startup
79
+ if not tokenizer or not model:
80
+ error_message = "Phishing detection model could not be loaded. Please check the server logs."
81
+ # Pass the error immediately to the template
82
+ return render_template('index.html', error=error_message)
83
+
84
+ if request.method == 'POST':
85
+ email_text_input = request.form['text']
86
+ if email_text_input:
87
+ try:
88
+ # Perform classification using your function
89
+ result_details = predict_email(email_text_input)
90
+ prediction_result = result_details['prediction'] # Get the top prediction label
91
+ print(f"Input: '{email_text_input[:100]}...', Result: {result_details}") # Log detailed result
92
+
93
+ # --- Determine a simple display label ---
94
+ # Customize this logic based on how you want to interpret the model's specific labels
95
+ if "Phishing Link" in prediction_result:
96
+ friendly_label_display = "Suspicious / Phishing Link Likely"
97
+ elif "Legitimate" in prediction_result:
98
+ friendly_label_display = "Likely Legitimate"
99
+ else:
100
+ friendly_label_display = prediction_result # Fallback to the raw label name
101
+
102
+ except Exception as e:
103
+ print(f"Error during prediction: {e}")
104
+ error_message = f"An error occurred during analysis: {e}"
105
+
106
+ # Render the HTML template
107
+ return render_template(
108
+ 'index.html',
109
+ result=result_details, # Pass the whole result dictionary
110
+ friendly_label=friendly_label_display, # Pass the simplified label
111
+ text=email_text_input,
112
+ error=error_message
113
+ )
114
+
115
+ # Run the Flask app
116
+ if __name__ == '__main__':
117
+ app.run(debug=True) # Set debug=False for production
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Flask
2
+ transformers
3
+ torch
static/Octocat.png ADDED

Git LFS Details

  • SHA256: ff7ce735496cfc747d717041ce95e6a36f27332f69f4cf8d9d7c8348926b6dc9
  • Pointer size: 132 Bytes
  • Size of remote file: 2.13 MB
static/main.css ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ margin: 0;
3
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", "Roboto", "Oxygen",
4
+ "Ubuntu", "Cantarell", "Fira Sans", "Droid Sans", "Helvetica Neue",
5
+ sans-serif;
6
+ -webkit-font-smoothing: antialiased;
7
+ -moz-osx-font-smoothing: grayscale;
8
+ }
9
+
10
+ code {
11
+ font-family: source-code-pro, Menlo, Monaco, Consolas, "Courier New",
12
+ monospace;
13
+ }
14
+
15
+ .App {
16
+ text-align: center;
17
+ }
18
+
19
+ .App-logo {
20
+ height: 40vmin;
21
+ pointer-events: none;
22
+ }
23
+
24
+ .App-header {
25
+ background-color: #282c34;
26
+ min-height: 100vh;
27
+ display: flex;
28
+ flex-direction: column;
29
+ align-items: center;
30
+ justify-content: center;
31
+ font-size: calc(10px + 2vmin);
32
+ color: white;
33
+ }
34
+
35
+ .App-link {
36
+ color: #61dafb;
37
+ }
38
+
39
+ .heart {
40
+ color: #ff0000;
41
+ }
42
+
43
+ .small {
44
+ font-size: 0.75rem;
45
+ }
templates/index.html ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1">
6
+ <title>Phishing Email Detector</title>
7
+ <style>
8
+ body { font-family: sans-serif; max-width: 700px; margin: 50px auto; padding: 20px; border: 1px solid #eee; border-radius: 8px; }
9
+ textarea { width: 100%; min-height: 150px; margin-bottom: 10px; }
10
+ button { padding: 10px 15px; cursor: pointer; }
11
+ .result-summary { margin-top: 20px; padding: 15px; border-radius: 5px; border: 1px solid; }
12
+ .result-details { margin-top: 10px; padding: 10px; background-color: #f8f9fa; border: 1px dashed #ccc; border-radius: 4px; font-size: 0.9em; }
13
+ .result-details ul { padding-left: 20px; margin: 5px 0;}
14
+ /* Update CSS classes based on simplified label */
15
+ .suspicious { background-color: #f8d7da; border-color: #f5c6cb; color: #721c24; }
16
+ .legitimate { background-color: #d4edda; border-color: #c3e6cb; color: #155724; }
17
+ .unknown { background-color: #e2e3e5; border-color: #d6d8db; color: #383d41; }
18
+ .error { background-color: #f8d7da; border-color: #f5c6cb; color: #721c24; margin-top: 15px; padding: 10px;}
19
+ .disclaimer { font-size: 0.8em; color: #666; margin-top: 30px; border-top: 1px solid #eee; padding-top: 10px;}
20
+ </style>
21
+ </head>
22
+ <body>
23
+ <h1>Phishing Email Detector</h1>
24
+ <p>Enter the full text content of an email below to analyze it.</p>
25
+ <p><strong>Disclaimer:</strong> This tool (using model <code>{{ model_name }}</code>) is for educational purposes and may not be accurate. Do not rely solely on this for security decisions.</p>
26
+
27
+ {% if error and not result %}
28
+ <div class="error">
29
+ <strong>Error:</strong> {{ error }}
30
+ </div>
31
+ {% endif %}
32
+
33
+ <form method="post">
34
+ <textarea name="text" placeholder="Paste email body here...">{{ text }}</textarea><br>
35
+ <button type="submit">Analyze Email</button>
36
+ </form>
37
+
38
+ {% if result and friendly_label %}
39
+ <div class="result-summary {% if 'Suspicious' in friendly_label %}suspicious{% elif 'Legitimate' in friendly_label %}legitimate{% else %}unknown{% endif %}">
40
+ <h2>Analysis Result Summary</h2>
41
+ <p><strong>Overall Assessment:</strong> {{ friendly_label }}</p>
42
+ <p><strong>Top Prediction:</strong> {{ result.prediction }}</p> <p><strong>Confidence Score:</strong> {{ "%.4f"|format(result.confidence) }}</p>
43
+ </div>
44
+
45
+ <div class="result-details">
46
+ <strong>Detailed Probabilities:</strong>
47
+ <ul>
48
+ {% for label, probability in result.all_probabilities.items() %}
49
+ <li>{{ label }}: {{ "%.4f"|format(probability) }}</li>
50
+ {% endfor %}
51
+ </ul>
52
+ </div>
53
+
54
+ {% elif error %}
55
+ <div class="error">
56
+ <strong>Error during analysis:</strong> {{ error }}
57
+ </div>
58
+ {% endif %}
59
+
60
+ <div class="disclaimer">
61
+ <strong>Reminder:</strong> Phishing detection is complex. This model provides probabilities for specific categories based on the text. Always look for other signs like sender addresses, urgent requests, unexpected attachments, and suspicious links.
62
+ </div>
63
+
64
+ </body>
65
+ </html>