Spaces:
Sleeping
Sleeping
Commit
·
9fc7514
1
Parent(s):
08620e1
Added TopicData objects
Browse files- Dockerfile +3 -3
- keynmf_data.joblib +3 -0
- main.py +3 -26
- top2vec_data.joblib +3 -0
Dockerfile
CHANGED
|
@@ -8,8 +8,7 @@ RUN apt install -y git
|
|
| 8 |
|
| 9 |
RUN pip install gunicorn==20.1.0
|
| 10 |
RUN pip install typing-extensions
|
| 11 |
-
RUN pip install topic-wizard
|
| 12 |
-
RUN pip install "turftopic>=0.13.0"
|
| 13 |
|
| 14 |
RUN useradd -m -u 1000 user
|
| 15 |
# Switch to the "user" user
|
|
@@ -32,6 +31,7 @@ RUN git clone https://github.com/x-tabdeveloping/topicwizard
|
|
| 32 |
WORKDIR $HOME/app/topicwizard
|
| 33 |
RUN git checkout topic-arena
|
| 34 |
RUN cp $HOME/app/main.py $HOME/app/topicwizard/main.py
|
| 35 |
-
RUN cp $HOME/app/
|
|
|
|
| 36 |
EXPOSE 7860
|
| 37 |
CMD gunicorn --timeout 0 -b 0.0.0.0:7860 --workers=2 --threads=4 --worker-class=gthread main:server
|
|
|
|
| 8 |
|
| 9 |
RUN pip install gunicorn==20.1.0
|
| 10 |
RUN pip install typing-extensions
|
| 11 |
+
RUN pip install "turftopic[topic-wizard]"
|
|
|
|
| 12 |
|
| 13 |
RUN useradd -m -u 1000 user
|
| 14 |
# Switch to the "user" user
|
|
|
|
| 31 |
WORKDIR $HOME/app/topicwizard
|
| 32 |
RUN git checkout topic-arena
|
| 33 |
RUN cp $HOME/app/main.py $HOME/app/topicwizard/main.py
|
| 34 |
+
RUN cp $HOME/app/keynmf_data.joblib $HOME/app/topicwizard/keynmf_data.joblib
|
| 35 |
+
RUN cp $HOME/app/top2vec_data.joblib $HOME/app/topicwizard/top2vec_data.joblib
|
| 36 |
EXPOSE 7860
|
| 37 |
CMD gunicorn --timeout 0 -b 0.0.0.0:7860 --workers=2 --threads=4 --worker-class=gthread main:server
|
keynmf_data.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08ffa99e4c964c6874e7f7ecb75dadb3caf63ea3a6f8a639c117c7df9864c07f
|
| 3 |
+
size 147687747
|
main.py
CHANGED
|
@@ -1,14 +1,10 @@
|
|
| 1 |
import dash_mantine_components as dmc
|
| 2 |
-
import joblib
|
| 3 |
-
import numpy as np
|
| 4 |
from dash_extensions.enrich import (Dash, DashBlueprint, Input, Output, State,
|
| 5 |
dcc, exceptions, html)
|
| 6 |
-
from sentence_transformers import SentenceTransformer
|
| 7 |
-
from sklearn.datasets import fetch_20newsgroups
|
| 8 |
from topicwizard.widgets import (ConceptClusters, DocumentClusters,
|
| 9 |
TopicBrowser, TopicHierarchy,
|
| 10 |
create_widget_container)
|
| 11 |
-
from turftopic import
|
| 12 |
|
| 13 |
|
| 14 |
def create_app(blueprint):
|
|
@@ -29,27 +25,8 @@ def create_app(blueprint):
|
|
| 29 |
return app
|
| 30 |
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
print("Calculating embeddings")
|
| 36 |
-
encoder = SentenceTransformer("sentence-transformers/static-retrieval-mrl-en-v1")
|
| 37 |
-
embeddings = encoder.encode(corpus, show_progress_bar=True)
|
| 38 |
-
|
| 39 |
-
print("Fitting keynmf")
|
| 40 |
-
keynmf = KeyNMF(5, encoder=encoder, random_state=42)
|
| 41 |
-
keynmf_data = keynmf.prepare_topic_data(corpus, embeddings=embeddings)
|
| 42 |
-
keynmf_data.hierarchy.divide_children(5)
|
| 43 |
-
|
| 44 |
-
print("Fitting top2vec")
|
| 45 |
-
top2vec = ClusteringTopicModel(
|
| 46 |
-
n_reduce_to=5,
|
| 47 |
-
feature_importance="centroid",
|
| 48 |
-
encoder=encoder,
|
| 49 |
-
random_state=0,
|
| 50 |
-
)
|
| 51 |
-
top2vec_data = top2vec.prepare_topic_data(corpus, embeddings=embeddings)
|
| 52 |
-
|
| 53 |
print("Building blueprints.")
|
| 54 |
keynmf_blueprint = create_widget_container(
|
| 55 |
[TopicBrowser(), ConceptClusters(), TopicHierarchy()],
|
|
|
|
| 1 |
import dash_mantine_components as dmc
|
|
|
|
|
|
|
| 2 |
from dash_extensions.enrich import (Dash, DashBlueprint, Input, Output, State,
|
| 3 |
dcc, exceptions, html)
|
|
|
|
|
|
|
| 4 |
from topicwizard.widgets import (ConceptClusters, DocumentClusters,
|
| 5 |
TopicBrowser, TopicHierarchy,
|
| 6 |
create_widget_container)
|
| 7 |
+
from turftopic.data import TopicData
|
| 8 |
|
| 9 |
|
| 10 |
def create_app(blueprint):
|
|
|
|
| 25 |
return app
|
| 26 |
|
| 27 |
|
| 28 |
+
keynmf_data = TopicData.from_disk("keynmf_data.joblib")
|
| 29 |
+
top2vec_data = TopicData.from_disk("top2vec_data.joblib")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
print("Building blueprints.")
|
| 31 |
keynmf_blueprint = create_widget_container(
|
| 32 |
[TopicBrowser(), ConceptClusters(), TopicHierarchy()],
|
top2vec_data.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4529b2dc32c6e1b87fc8c24367fbbc20f901b60296d28feef50dbe175913a03a
|
| 3 |
+
size 177605661
|