Spaces:

ilushado
/

article_roberta_classifier

Runtime error

App Files Files Community

ilushado commited on Apr 17, 2023

Commit

d4ef182

1 Parent(s): 279d5aa

added model

Browse files

Files changed (5) hide show

app.py +106 -4
model/merges.txt +0 -0
model/pytorch_roberta_sentiment.bin +3 -0
model/vocab.json +0 -0
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -1,14 +1,116 @@
 import streamlit as st
 st.markdown("### Hello, world!")
-st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVjlE.jpg'>", unsafe_allow_html=True)
 # ^-- можно показывать пользователю текст, картинки, ограниченное подмножество html - всё как в jupyter
-text = st.text_area("TEXT HERE")
-# ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент
 from transformers import pipeline
 # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
-st.markdown("12456")
 # выводим результаты модели в текстовое поле, на потеху пользователю

 import streamlit as st
+import torch
+import pandas as pd
+import numpy as np
+import torch
+import transformers
+import json
+from torch.utils.data import Dataset, DataLoader
+from transformers import RobertaModel, RobertaTokenizer
+import transformers
+idx_to_tag = {0: 'cs',
+ 1: 'stat',
+ 2: 'physics',
+ 3: 'math',
+ 4: 'cond-mat',
+ 5: 'q-bio',
+ 6: 'eess',
+ 7: 'quant-ph',
+ 8: 'astro-ph',
+ 9: 'nlin',
+ 10: 'q-fin',
+ 11: 'gr-qc',
+ 12: 'hep-th',
+ 13: 'hep-ex',
+ 14: 'econ',
+ 15: 'hep-ph',
+ 16: 'nucl-th',
+ 17: 'hep-lat',
+ 18: 'math-ph',
+ 19: 'nucl-ex'}
+tag_to_idx = {'cs': 0,
+ 'stat': 1,
+ 'physics': 2,
+ 'math': 3,
+ 'cond-mat': 4,
+ 'q-bio': 5,
+ 'eess': 6,
+ 'quant-ph': 7,
+ 'astro-ph': 8,
+ 'nlin': 9,
+ 'q-fin': 10,
+ 'gr-qc': 11,
+ 'hep-th': 12,
+ 'hep-ex': 13,
+ 'econ': 14,
+ 'hep-ph': 15,
+ 'nucl-th': 16,
+ 'hep-lat': 17,
+ 'math-ph': 18,
+ 'nucl-ex': 19}
+class RobertaClass(torch.nn.Module):
+    def __init__(self):
+        super(RobertaClass, self).__init__()
+        self.l1 = RobertaModel.from_pretrained("roberta-base")
+        self.pre_classifier = torch.nn.Linear(768, 768)
+        self.dropout = torch.nn.Dropout(0.3)
+        self.classifier = torch.nn.Linear(768, 5)
+    def forward(self, input_ids, attention_mask, token_type_ids):
+        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
+        hidden_state = output_1[0]
+        pooler = hidden_state[:, 0]
+        pooler = self.pre_classifier(pooler)
+        pooler = torch.nn.ReLU()(pooler)
+        pooler = self.dropout(pooler)
+        output = self.classifier(pooler)
+        return output
+tokenizer = RobertaTokenizer.from_pretrained('roberta-base', truncation=True, do_lower_case=True,
+                                                 vocab_file='model/vocab.json',
+                                                 merges_file='model/merges.txt')
+model = torch.load('model/pytorch_roberta_sentiment.bin', map_location=torch.device('cpu'))
 st.markdown("### Hello, world!")
+# st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVjlE.jpg'>", unsafe_allow_html=True)
 # ^-- можно показывать пользователю текст, картинки, ограниченное подмножество html - всё как в jupyter
+title = st.text_area("Title HERE")
+abstract = st.text_area("Abstract HERE")
+text = title + " : " + abstract
+inputs = tokenizer.encode_plus(
+            text,
+            None,
+            add_special_tokens=True,
+            max_length=256,
+            pad_to_max_length=True,
+            return_token_type_ids=True
+        )
+ids = torch.Tensor(inputs['input_ids']).long()
+mask = torch.Tensor(inputs['attention_mask']).long()
+token_type_ids = torch.Tensor(inputs['token_type_ids']).long()
+ans = model(ids.unsqueeze(0), mask.unsqueeze(0), token_type_ids.unsqueeze(0))
 from transformers import pipeline
 # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
+idx = torch.nn.functional.softmax(ans[0], dim=0).argmax().item()
+st.markdown(f'{idx_to_tag[idx]}')
 # выводим результаты модели в текстовое поле, на потеху пользователю

model/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model/pytorch_roberta_sentiment.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d981012dade5ff2425eff3ccfb9bdbdc2938b1785009fa969acca60916a75ff0
+size 501514997

model/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -1,3 +1,5 @@
 torch
 streamlit
-transformers

 torch
 streamlit
+transformers
+pandas
+numpy