Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
| 3 |
+
from rdkit import Chem
|
| 4 |
+
from rdkit.Chem import AllChem
|
| 5 |
+
import py3Dmol
|
| 6 |
+
import random
|
| 7 |
+
import torch
|
| 8 |
+
|
| 9 |
+
# Load models
|
| 10 |
+
bio_gpt = pipeline("text-generation", model="microsoft/BioGPT-Large")
|
| 11 |
+
chemberta_tokenizer = AutoTokenizer.from_pretrained("seyonec/ChemBERTa-zinc-base-v1")
|
| 12 |
+
chemberta_model = AutoModelForCausalLM.from_pretrained("seyonec/ChemBERTa-zinc-base-v1")
|
| 13 |
+
compliance_qa = pipeline("question-answering", model="nlpaueb/legal-bert-base-uncased")
|
| 14 |
+
|
| 15 |
+
# Generate random SMILES string
|
| 16 |
+
def generate_smiles():
|
| 17 |
+
sample_smiles = ["CCO", "CCN", "C1=CC=CC=C1", "C(C(=O)O)N", "CC(C)CC"]
|
| 18 |
+
return random.choice(sample_smiles)
|
| 19 |
+
|
| 20 |
+
# Convert SMILES to 3D molecule HTML using py3Dmol
|
| 21 |
+
def mol_to_3d_html(smiles):
|
| 22 |
+
mol = Chem.MolFromSmiles(smiles)
|
| 23 |
+
mol = Chem.AddHs(mol)
|
| 24 |
+
AllChem.EmbedMolecule(mol, AllChem.ETKDG())
|
| 25 |
+
AllChem.UFFOptimizeMolecule(mol)
|
| 26 |
+
block = Chem.MolToMolBlock(mol)
|
| 27 |
+
|
| 28 |
+
view = py3Dmol.view(width=400, height=400)
|
| 29 |
+
view.addModel(block, "mol")
|
| 30 |
+
view.setStyle({"stick": {}})
|
| 31 |
+
view.zoomTo()
|
| 32 |
+
return view.render().data # Gradio-compatible HTML output
|
| 33 |
+
|
| 34 |
+
# Main pipeline
|
| 35 |
+
def run_pipeline(disease, symptoms):
|
| 36 |
+
print(f"Received input: disease={disease}, symptoms={symptoms}")
|
| 37 |
+
|
| 38 |
+
# Literature generation
|
| 39 |
+
try:
|
| 40 |
+
prompt = f"Recent treatment and drug research for {disease} with symptoms: {symptoms}"
|
| 41 |
+
insights = bio_gpt(prompt, max_length=200, do_sample=True)[0]['generated_text']
|
| 42 |
+
except Exception as e:
|
| 43 |
+
print("BioGPT Error:", e)
|
| 44 |
+
insights = f"β Error generating literature: {e}"
|
| 45 |
+
|
| 46 |
+
# 3D Molecule from random SMILES
|
| 47 |
+
try:
|
| 48 |
+
smiles = generate_smiles()
|
| 49 |
+
html_3d = mol_to_3d_html(smiles)
|
| 50 |
+
except Exception as e:
|
| 51 |
+
print("3D Molecule Error:", e)
|
| 52 |
+
smiles = "N/A"
|
| 53 |
+
html_3d = f"<p>β Error rendering molecule: {str(e)}</p>"
|
| 54 |
+
|
| 55 |
+
# ChemBERTa Score
|
| 56 |
+
try:
|
| 57 |
+
inputs = chemberta_tokenizer(smiles, return_tensors="pt")
|
| 58 |
+
with torch.no_grad():
|
| 59 |
+
outputs = chemberta_model(**inputs)
|
| 60 |
+
score = round(outputs.logits.mean().item(), 3)
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print("ChemBERTa Error:", e)
|
| 63 |
+
score = f"β Error: {e}"
|
| 64 |
+
|
| 65 |
+
# LegalBERT Compliance
|
| 66 |
+
try:
|
| 67 |
+
compliance = compliance_qa(
|
| 68 |
+
question="What does FDA require for drug testing?",
|
| 69 |
+
context="FDA requires extensive testing for new drug candidates including Phase I, II, and III clinical trials."
|
| 70 |
+
)["answer"]
|
| 71 |
+
except Exception as e:
|
| 72 |
+
print("LegalBERT Error:", e)
|
| 73 |
+
compliance = f"β Error: {e}"
|
| 74 |
+
|
| 75 |
+
return insights, smiles, html_3d, score, compliance
|
| 76 |
+
|
| 77 |
+
# Gradio Interface
|
| 78 |
+
iface = gr.Interface(
|
| 79 |
+
fn=run_pipeline,
|
| 80 |
+
inputs=[
|
| 81 |
+
gr.Textbox(label="π¦ Disease", placeholder="e.g., Lung Cancer"),
|
| 82 |
+
gr.Textbox(label="π©Ί Symptoms", placeholder="e.g., fatigue, breathlessness"),
|
| 83 |
+
],
|
| 84 |
+
outputs=[
|
| 85 |
+
gr.Textbox(label="π Literature Insights"),
|
| 86 |
+
gr.Textbox(label="π§ͺ Generated SMILES"),
|
| 87 |
+
gr.HTML(label="𧬠3D Molecule"),
|
| 88 |
+
gr.Textbox(label="π Molecular Property Score (ChemBERTa)"),
|
| 89 |
+
gr.Textbox(label="βοΈ Legal Compliance (FDA)"),
|
| 90 |
+
],
|
| 91 |
+
title="π Drug Discovery using LLMs",
|
| 92 |
+
description="Enter disease & symptoms to explore literature, generate molecule structure, and simulate compliance.",
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
iface.launch()
|