File size: 1,314 Bytes
76b88ad
 
63497f0
4972136
338da22
ef39b90
bb7495c
a7a92b0
d5836ce
 
1a42539
7d770e8
 
4a5db62
 
 
 
 
 
c71c098
 
7be3400
dca06d9
 
 
 
 
 
5f7b93c
4a5db62
5f7b93c
38f627c
338da22
5f7b93c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import streamlit as st

st.markdown("### Article Classifier")
st.markdown("<img width=200px src='https://media.istockphoto.com/photos/funny-cat-is-studying-chemistry-picture-id526831620'>", unsafe_allow_html=True)
st.markdown("This is a tool for classifying article category by it's title and summary. \n Follow the instructions below")
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import torch
import numpy as np
tokenizer = AutoTokenizer.from_pretrained("Wi/arxiv-topics-distilbert-base-cased")
model = AutoModelForSequenceClassification.from_pretrained("Wi/arxiv-topics-distilbert-base-cased")

title = st.text_area("Put the title here")
abstract = st.text_area("Put the abstract here")
if st.button('Press when ready'):
    text = 'Title:' + title + '\n' + 'Abstract:' + abstract
    
    inputs = tokenizer(text, return_tensors="pt")
    with torch.no_grad():
        logits = model(**inputs).logits

    probs = logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist()
    order = np.argsort(probs)[::-1]
    i = 0
    sum = 0
    predicted_class_id = []
    while sum < 0.95:
        predicted_class_id.append(order[i])
        sum += probs[order[i]]
        i+=1
    

    for id in predicted_class_id:
        st.markdown(model.config.id2label[id])