Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import torch | |
| import pandas as pd | |
| import numpy as np | |
| import torch | |
| import transformers | |
| import json | |
| from torch.utils.data import Dataset, DataLoader | |
| from transformers import RobertaModel, RobertaTokenizer | |
| import transformers | |
| idx_to_tag = {0: 'cs', | |
| 1: 'stat', | |
| 2: 'physics', | |
| 3: 'math', | |
| 4: 'q-bio', | |
| 5: 'eess', | |
| 6: 'economics, finances', | |
| 7: 'gr-qc', | |
| 8: 'hep-ex', | |
| 9: 'hep-lat'} | |
| tag_to_idx = {'cs': 0, | |
| 'stat': 1, | |
| 'physics': 2, | |
| 'math': 3, | |
| 'q-bio': 4, | |
| 'eess': 5, | |
| 'economics, finances': 6, | |
| 'gr-qc': 7, | |
| 'hep-ex': 8, | |
| 'hep-lat': 9} | |
| class RobertaClass(torch.nn.Module): | |
| def __init__(self): | |
| super(RobertaClass, self).__init__() | |
| self.l1 = RobertaModel.from_pretrained("roberta-base") | |
| self.pre_classifier = torch.nn.Linear(768, 768) | |
| self.dropout = torch.nn.Dropout(0.3) | |
| self.classifier = torch.nn.Linear(768, 10) | |
| def forward(self, input_ids, attention_mask, token_type_ids): | |
| output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) | |
| hidden_state = output_1[0] | |
| pooler = hidden_state[:, 0] | |
| pooler = self.pre_classifier(pooler) | |
| pooler = torch.nn.ReLU()(pooler) | |
| pooler = self.dropout(pooler) | |
| output = self.classifier(pooler) | |
| return output | |
| def load_model(): | |
| tokenizer = RobertaTokenizer.from_pretrained('roberta-base', truncation=True, do_lower_case=True, | |
| vocab_file='model/vocab.json', | |
| merges_file='model/merges.txt') | |
| model = torch.load('model/pytorch_roberta_sentiment.bin', map_location=torch.device('cpu')) | |
| return model, tokenizer | |
| model, tokenizer = load_model() | |
| st.markdown("### Угадыватель") | |
| title = st.text_area("Title здесь") | |
| abstract = st.text_area("Abstract здесь") | |
| ans = None | |
| if st.button('Предположить'): | |
| if len(title) == 0 or len(abstract) == 0: | |
| st.write("Вы ничего не ввели =(") | |
| else: | |
| text = title + " : " + abstract | |
| inputs = tokenizer.encode_plus( | |
| text, | |
| None, | |
| add_special_tokens=True, | |
| max_length=256, | |
| pad_to_max_length=True, | |
| return_token_type_ids=True | |
| ) | |
| ids = torch.Tensor(inputs['input_ids']).long() | |
| mask = torch.Tensor(inputs['attention_mask']).long() | |
| token_type_ids = torch.Tensor(inputs['token_type_ids']).long() | |
| ans = model(ids.unsqueeze(0), mask.unsqueeze(0), token_type_ids.unsqueeze(0)) | |
| idx = torch.nn.functional.softmax(ans[0], dim=0).argmax().item() | |
| print('ANSLEN', ans.shape) | |
| st.markdown(f'{idx_to_tag[idx]}') | |
| if st.button("Посмотреть топ"): | |
| if not ans: | |
| print(1) | |
| text = title + " : " + abstract | |
| inputs = tokenizer.encode_plus( | |
| text, | |
| None, | |
| add_special_tokens=True, | |
| max_length=256, | |
| pad_to_max_length=True, | |
| return_token_type_ids=True | |
| ) | |
| ids = torch.Tensor(inputs['input_ids']).long() | |
| mask = torch.Tensor(inputs['attention_mask']).long() | |
| token_type_ids = torch.Tensor(inputs['token_type_ids']).long() | |
| ans = model(ids.unsqueeze(0), mask.unsqueeze(0), token_type_ids.unsqueeze(0)) | |
| if len(title) == 0 or len(abstract) == 0: | |
| st.write("Вы ничего не ввели =(") | |
| else: | |
| elems = [el.item() for el in ans[0].argsort(descending=True)] | |
| probs = ans[0].softmax(dim=0).detach().numpy() | |
| str_ans = '' | |
| current_prob = 0 | |
| current_elems = [] | |
| current_probs = [] | |
| idx = 0 | |
| while current_prob < 0.95 and idx < len(elems): | |
| current_elems.append(idx_to_tag[elems[idx]]) | |
| current_probs.append(probs[elems[idx]]) | |
| current_prob += probs[elems[idx]] | |
| idx += 1 | |
| st.write(pd.DataFrame({ | |
| 'Направление': current_elems, | |
| 'Вероятность': current_probs, | |
| })) | |