Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -169,39 +169,34 @@ def find_sentences_with_keywords(text, keywords):
|
|
| 169 |
|
| 170 |
|
| 171 |
# Main function to process both PDFs based on the Excel file names and the sheet name
|
| 172 |
-
def
|
| 173 |
-
#
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
pdf_text1,pdf_text2 = extract_and_paragraph(pdf_file1, pdf_file2, False)
|
| 185 |
-
|
| 186 |
-
# Find sentences that match the sheet names (used as keywords)
|
| 187 |
-
matched_sentences1 = find_sentences_with_keywords(pdf_text1, set[sheet])
|
| 188 |
-
matched_sentences2 = find_sentences_with_keywords(pdf_text2, set[sheet])
|
| 189 |
-
|
| 190 |
-
# Format the results for output
|
| 191 |
-
result = {
|
| 192 |
-
"PDF 1": {
|
| 193 |
-
"File": pdf_file1,
|
| 194 |
-
"Keyword": set[sheet],
|
| 195 |
-
"Sentences": matched_sentences1
|
| 196 |
-
},
|
| 197 |
-
"PDF 2": {
|
| 198 |
-
"File": pdf_file2,
|
| 199 |
-
"Keyword": set[sheet],
|
| 200 |
-
"Sentences": matched_sentences2
|
| 201 |
-
}
|
| 202 |
}
|
|
|
|
| 203 |
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
|
| 206 |
stored_paragraphs_1 = []
|
| 207 |
stored_paragraphs_2 = []
|
|
@@ -288,8 +283,13 @@ with gr.Blocks() as demo:
|
|
| 288 |
b1.click(fn=process_and_compare, inputs=[file1, sheet, file2, sheet], outputs=result)
|
| 289 |
with gr.Row():
|
| 290 |
with gr.Column():
|
| 291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
b2 = gr.Button("Extract text information")
|
| 293 |
-
b2.click(fn=
|
|
|
|
| 294 |
|
| 295 |
demo.launch()
|
|
|
|
| 169 |
|
| 170 |
|
| 171 |
# Main function to process both PDFs based on the Excel file names and the sheet name
|
| 172 |
+
def process_pdfs_and_analyze_sentiment(file1, file2, sheet):
|
| 173 |
+
# Extract text from both PDFs based on the file name
|
| 174 |
+
text1 = extract_text_from_pdf(file1)
|
| 175 |
+
text2 = extract_text_from_pdf(file2)
|
| 176 |
+
|
| 177 |
+
# Use sheet name as the keyword to find relevant sentences
|
| 178 |
+
keywords = {
|
| 179 |
+
'GDP': ['GDP'],
|
| 180 |
+
'HICP': ['HICP'],
|
| 181 |
+
'RRE prices': ['RRE', 'residential'],
|
| 182 |
+
'CRE prices': ['CRE', 'commercial'],
|
| 183 |
+
'Unemployment': ['unemployment']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
}
|
| 185 |
+
selected_keywords = keywords.get(sheet, [])
|
| 186 |
|
| 187 |
+
# Find sentences containing the keywords
|
| 188 |
+
sentences1 = find_sentences_with_keywords(text1, selected_keywords)
|
| 189 |
+
sentences2 = find_sentences_with_keywords(text2, selected_keywords)
|
| 190 |
+
|
| 191 |
+
# Concatenate all sentences for each PDF
|
| 192 |
+
text_pdf1 = " ".join(sentences1)
|
| 193 |
+
text_pdf2 = " ".join(sentences2)
|
| 194 |
+
|
| 195 |
+
# Perform sentiment analysis on the extracted sentences for each PDF
|
| 196 |
+
result_pdf1 = fin_ext_bis(text_pdf1)
|
| 197 |
+
result_pdf2 = fin_ext_bis(text_pdf2)
|
| 198 |
+
|
| 199 |
+
return result_pdf1, result_pdf2
|
| 200 |
|
| 201 |
stored_paragraphs_1 = []
|
| 202 |
stored_paragraphs_2 = []
|
|
|
|
| 283 |
b1.click(fn=process_and_compare, inputs=[file1, sheet, file2, sheet], outputs=result)
|
| 284 |
with gr.Row():
|
| 285 |
with gr.Column():
|
| 286 |
+
sentiment_results_pdf1 = gr.HighlightedText(label="Sentiment Analysis - PDF 1")
|
| 287 |
+
with gr.Column():
|
| 288 |
+
sentiment_results_pdf2 = gr.HighlightedText(label="Sentiment Analysis - PDF 2")
|
| 289 |
+
|
| 290 |
+
# Button to extract text from PDFs and perform sentiment analysis
|
| 291 |
b2 = gr.Button("Extract text information")
|
| 292 |
+
b2.click(fn=process_pdfs_and_analyze_sentiment, inputs=[file1, file2, sheet], outputs=[sentiment_results_pdf1, sentiment_results_pdf2])
|
| 293 |
+
|
| 294 |
|
| 295 |
demo.launch()
|