Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -25,6 +25,7 @@ def make_spans(text, results):
|
|
| 25 |
summarizer = pipeline("summarization", model="human-centered-summarization/financial-summarization-pegasus")
|
| 26 |
fin_model = pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone')
|
| 27 |
fin_model_bis = pipeline("sentiment-analysis", model='ProsusAI/finbert', tokenizer='ProsusAI/finbert')
|
|
|
|
| 28 |
|
| 29 |
def summarize_text(text):
|
| 30 |
resp = summarizer(text)
|
|
@@ -81,6 +82,7 @@ def get_sheet_names(file):
|
|
| 81 |
xls = pd.ExcelFile(os.path.join(PDF_FOLDER, file))
|
| 82 |
return gr.update(choices=xls.sheet_names)
|
| 83 |
|
|
|
|
| 84 |
def process_and_compare(file1, sheet1, file2, sheet2):
|
| 85 |
def process_file(file_path, sheet_name):
|
| 86 |
# Extract year from file name
|
|
@@ -106,20 +108,15 @@ def process_and_compare(file1, sheet1, file2, sheet2):
|
|
| 106 |
df.columns = new_columns
|
| 107 |
else:
|
| 108 |
raise ValueError(f"Expected {len(new_columns)} columns, but found {len(df.columns)} columns in the data.")
|
| 109 |
-
|
| 110 |
-
return df
|
| 111 |
|
| 112 |
# Process both files
|
| 113 |
-
|
| 114 |
-
|
|
|
|
| 115 |
year1 = int(re.search(r'(\d{4})', file1).group(1))
|
| 116 |
year2 = int(re.search(r'(\d{4})', file2).group(1))
|
| 117 |
-
# Calculate the differences
|
| 118 |
-
# historical_col1 = f'Historical {int(year1) - 1}'
|
| 119 |
-
# historical_col2 = f'Historical {int(year2) - 1}'
|
| 120 |
-
|
| 121 |
-
# df1['Historical vs Adverse'] = df1[historical_col1] - df1['Adverse Cumulative']
|
| 122 |
-
# df2['Historical vs Adverse'] = df2[historical_col2] - df2['Adverse Cumulative']
|
| 123 |
|
| 124 |
# Merge dataframes on 'Country'
|
| 125 |
merged_df = pd.merge(df2, df1, on='Country', suffixes=(f'_{year1}', f'_{year2}'))
|
|
@@ -197,10 +194,48 @@ def process_pdfs_and_analyze_sentiment(file1, file2, sheet):
|
|
| 197 |
result_pdf2 = fin_ext_bis(text_pdf2)
|
| 198 |
|
| 199 |
return result_pdf1, result_pdf2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
stored_paragraphs_1 = []
|
| 202 |
stored_paragraphs_2 = []
|
| 203 |
|
|
|
|
|
|
|
|
|
|
| 204 |
with gr.Blocks() as demo:
|
| 205 |
with gr.Tab("Financial Report Text Analysis"):
|
| 206 |
gr.Markdown("## Financial Report Paragraph Selection and Analysis on adverse macro-economy scenario")
|
|
@@ -283,6 +318,11 @@ with gr.Blocks() as demo:
|
|
| 283 |
with gr.Row():
|
| 284 |
with gr.Column():
|
| 285 |
sentiment_results_pdf1 = gr.HighlightedText(label="Sentiment Analysis - PDF 1")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
with gr.Column():
|
| 287 |
sentiment_results_pdf2 = gr.HighlightedText(label="Sentiment Analysis - PDF 2")
|
| 288 |
|
|
|
|
| 25 |
summarizer = pipeline("summarization", model="human-centered-summarization/financial-summarization-pegasus")
|
| 26 |
fin_model = pipeline("sentiment-analysis", model='yiyanghkust/finbert-tone', tokenizer='yiyanghkust/finbert-tone')
|
| 27 |
fin_model_bis = pipeline("sentiment-analysis", model='ProsusAI/finbert', tokenizer='ProsusAI/finbert')
|
| 28 |
+
table_to_text = pipeline('text2text-generation', model='google/flan-t5-large')
|
| 29 |
|
| 30 |
def summarize_text(text):
|
| 31 |
resp = summarizer(text)
|
|
|
|
| 82 |
xls = pd.ExcelFile(os.path.join(PDF_FOLDER, file))
|
| 83 |
return gr.update(choices=xls.sheet_names)
|
| 84 |
|
| 85 |
+
|
| 86 |
def process_and_compare(file1, sheet1, file2, sheet2):
|
| 87 |
def process_file(file_path, sheet_name):
|
| 88 |
# Extract year from file name
|
|
|
|
| 108 |
df.columns = new_columns
|
| 109 |
else:
|
| 110 |
raise ValueError(f"Expected {len(new_columns)} columns, but found {len(df.columns)} columns in the data.")
|
| 111 |
+
columns = ['Country', f'Adverse {year}', f'Adverse {year+1}', f'Adverse {year+2}', 'Adverse Cumulative']
|
| 112 |
+
return df, df[columns]
|
| 113 |
|
| 114 |
# Process both files
|
| 115 |
+
global stored_df1, stored_df2
|
| 116 |
+
df1, stored_df1 = process_file(file1, sheet1)
|
| 117 |
+
df2, stored_df2 = process_file(file2, sheet2)
|
| 118 |
year1 = int(re.search(r'(\d{4})', file1).group(1))
|
| 119 |
year2 = int(re.search(r'(\d{4})', file2).group(1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
# Merge dataframes on 'Country'
|
| 122 |
merged_df = pd.merge(df2, df1, on='Country', suffixes=(f'_{year1}', f'_{year2}'))
|
|
|
|
| 194 |
result_pdf2 = fin_ext_bis(text_pdf2)
|
| 195 |
|
| 196 |
return result_pdf1, result_pdf2
|
| 197 |
+
def change_choices(df):
|
| 198 |
+
return gr.Dropdown.update(choices=df.Country.values.tolist())
|
| 199 |
+
|
| 200 |
+
def generate_text(df, country, theme):
|
| 201 |
+
# Filter the dataframe based on the country
|
| 202 |
+
row = df[df['Country'] == country].iloc[0]
|
| 203 |
+
|
| 204 |
+
# Convert the row to a string format for prompt
|
| 205 |
+
row_str = row.to_string(index=True)
|
| 206 |
+
|
| 207 |
+
# Create the prompt
|
| 208 |
+
prompt = f"""
|
| 209 |
+
Here is an example:
|
| 210 |
+
A table from France country:
|
| 211 |
+
Country France
|
| 212 |
+
Adverse 2020 -0.427975
|
| 213 |
+
Adverse 2021 -1.987167
|
| 214 |
+
Adverse 2022 -1.195906
|
| 215 |
+
Adverse Cumulative -3.573762
|
| 216 |
|
| 217 |
+
The theme is GDP
|
| 218 |
+
|
| 219 |
+
The output:
|
| 220 |
+
In adverse scenario, the growth for GDP in France is -0.427975% in 2018 and isn't getting better in 2019 with -1.98767% and -1.195906% in 2022.
|
| 221 |
+
|
| 222 |
+
Here is another table:
|
| 223 |
+
{row_str}
|
| 224 |
+
|
| 225 |
+
Summarize the adverse scenario growth for {theme} in {country} based on the data above, following a similar pattern to the example for France.
|
| 226 |
+
"""
|
| 227 |
+
|
| 228 |
+
# Generate the descriptive text using the model
|
| 229 |
+
result = table_to_text(prompt, max_length=200)[0]['generated_text']
|
| 230 |
+
|
| 231 |
+
return result
|
| 232 |
+
# Global variable
|
| 233 |
stored_paragraphs_1 = []
|
| 234 |
stored_paragraphs_2 = []
|
| 235 |
|
| 236 |
+
stored_df1 = []
|
| 237 |
+
stored_df2 = []
|
| 238 |
+
|
| 239 |
with gr.Blocks() as demo:
|
| 240 |
with gr.Tab("Financial Report Text Analysis"):
|
| 241 |
gr.Markdown("## Financial Report Paragraph Selection and Analysis on adverse macro-economy scenario")
|
|
|
|
| 318 |
with gr.Row():
|
| 319 |
with gr.Column():
|
| 320 |
sentiment_results_pdf1 = gr.HighlightedText(label="Sentiment Analysis - PDF 1")
|
| 321 |
+
country_1_dropdown = gr.Dropdown(label="Select Country from Excel File 1")
|
| 322 |
+
country_1_dropdown.change(fn =change_choices, inputs= stored_df1, outputs= paragraph_1_dropdown)
|
| 323 |
+
summarize_btn1_country = gr.Button("Summary for the selected country")
|
| 324 |
+
text_result_df1 = gr.Textbox(label="Sentence for excel file 1", lines=2)
|
| 325 |
+
summarize_btn1_country.click(fn= generate_text, inputs = [stored_df1, country_1_dropdown, sheet], outputs = text_result_df1)
|
| 326 |
with gr.Column():
|
| 327 |
sentiment_results_pdf2 = gr.HighlightedText(label="Sentiment Analysis - PDF 2")
|
| 328 |
|