absa_hcmus / app.py
pat25bk's picture
add data analyze
a9200d2
raw
history blame
2.13 kB
import pandas as pd
import json
import streamlit as st
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
# Define the Streamlit app
st.title("Data Analysis and Visualization")
# File upload and processing
uploaded_file = st.file_uploader("Upload JSON File", type=["json"])
if uploaded_file:
loaded_dict = json.load(uploaded_file)
df = pd.DataFrame(loaded_dict)
st.subheader("Dataframe (df)")
st.write(df)
# Group by and aggregate data
grouped = df.groupby('A').agg({'S': ['count', lambda x: (x == 'great').sum(), lambda x: (x == 'ok').sum(), lambda x: (x == 'bad').sum()]})
grouped.columns = grouped.columns.map('_'.join)
grouped = grouped.reset_index()
grouped = grouped.rename(columns={'A': 'Aspect', 'S_count': 'Freq', 'S_<lambda_0>': 'Great', 'S_<lambda_1>': 'Ok', 'S_<lambda_2>': 'Bad'})
st.subheader("Top Aspects by Frequency")
st.write(grouped.sort_values(by="Freq", ascending=False).head(5))
# Sentiment Distribution Chart
sentiment_distribution = df["S"].value_counts(normalize=True) * 100
palette_color = sns.color_palette('bright')
st.subheader("Sentiment Distribution")
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 6))
ax1.pie(sentiment_distribution, labels=sentiment_distribution.index, autopct='%1.1f%%', startangle=140)
ax1.axis('equal')
ax1.set_title("Sentiment Distribution %")
sns.countplot(x="S", data=df, palette=palette_color, ax=ax2)
ax2.set_title("Sentiment Distribution Counts")
st.pyplot(fig)
# Word Cloud
aspect_terms = " ".join(df["A"])
wordcloud = WordCloud(
width=800,
height=400,
background_color='white',
max_words=100,
colormap='inferno',
contour_width=3,
contour_color='red',
).generate(aspect_terms)
st.subheader("Word Cloud for Most Mentioned Aspects")
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.title("Most mentioned aspect terms")
plt.axis("off")
st.pyplot()
st.sidebar.markdown("**Upload a JSON file to get started.**")