Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,6 +15,8 @@ from dotenv import load_dotenv
|
|
| 15 |
import helpers
|
| 16 |
from omeka_s_api_client import OmekaSClient, OmekaSClientError
|
| 17 |
from lancedb_client import LanceDBManager
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# Load .env for credentials
|
| 20 |
load_dotenv()
|
|
@@ -52,7 +54,7 @@ app.layout = html.Div([
|
|
| 52 |
# Header
|
| 53 |
dbc.NavbarSimple(
|
| 54 |
children=[],
|
| 55 |
-
brand="Omeka S Computer Vision
|
| 56 |
brand_href="/",
|
| 57 |
color="light",
|
| 58 |
dark=False,
|
|
@@ -70,8 +72,8 @@ app.layout = html.Div([
|
|
| 70 |
|
| 71 |
# Tabs
|
| 72 |
dcc.Tabs(id="data-tabs", value="api", children=[
|
| 73 |
-
dcc.Tab(label="
|
| 74 |
-
dcc.Tab(label="
|
| 75 |
]),
|
| 76 |
|
| 77 |
html.Div(id="data-tab-content"),
|
|
@@ -204,36 +206,6 @@ app.layout = html.Div([
|
|
| 204 |
html.Div(id="status"),
|
| 205 |
dcc.Store(id="omeka-client-config", storage_type="session"),
|
| 206 |
]),
|
| 207 |
-
|
| 208 |
-
# Footer
|
| 209 |
-
html.Footer([
|
| 210 |
-
html.Hr(),
|
| 211 |
-
dbc.Container([
|
| 212 |
-
dbc.Row([
|
| 213 |
-
dbc.Col([
|
| 214 |
-
html.Img(src="SmartBibl.IA_Solutions.png", height="50"),
|
| 215 |
-
html.Small([
|
| 216 |
-
html.Br(),
|
| 217 |
-
html.A("Géraldine Geoffroy", href="mailto:grldn.geoffroy@gmail.com", className="text-muted")
|
| 218 |
-
])
|
| 219 |
-
]),
|
| 220 |
-
dbc.Col([
|
| 221 |
-
html.H5("Code source"),
|
| 222 |
-
html.Ul([
|
| 223 |
-
html.Li(html.A("Github", href="https://github.com/gegedenice/openalex-explorer", className="text-muted", target="_blank"))
|
| 224 |
-
])
|
| 225 |
-
]),
|
| 226 |
-
dbc.Col([
|
| 227 |
-
html.H5("Ressources"),
|
| 228 |
-
html.Ul([
|
| 229 |
-
html.Li(html.A("Nomic Atlas", href="https://atlas.nomic.ai/", target="_blank", className="text-muted")),
|
| 230 |
-
html.Li(html.A("Model nomic-embed-text-v1.5", href="https://huggingface.co/nomic-ai/nomic-embed-text-v1.5", target="_blank", className="text-muted")),
|
| 231 |
-
html.Li(html.A("Model nomic-embed-vision-v1.5", href="https://huggingface.co/nomic-ai/nomic-embed-vision-v1.5", target="_blank", className="text-muted"))
|
| 232 |
-
])
|
| 233 |
-
])
|
| 234 |
-
])
|
| 235 |
-
])
|
| 236 |
-
], className="mt-5 p-3 bg-light border-top")
|
| 237 |
])
|
| 238 |
|
| 239 |
# -------------------- UI Callbacks --------------------
|
|
@@ -248,7 +220,7 @@ def render_tab_content(tab):
|
|
| 248 |
if tab == "omeka":
|
| 249 |
return html.Div([
|
| 250 |
html.Div([
|
| 251 |
-
html.H5("
|
| 252 |
# API URL input with full width
|
| 253 |
dbc.InputGroup([
|
| 254 |
dbc.Input(
|
|
@@ -308,7 +280,7 @@ def render_tab_content(tab):
|
|
| 308 |
], className="border rounded bg-white shadow-sm")
|
| 309 |
elif tab == "lance":
|
| 310 |
return html.Div([
|
| 311 |
-
html.H5("
|
| 312 |
dbc.Button("Load LanceDB tables", id="load-tables", color="link", size="sm", className="mt-2"),
|
| 313 |
dcc.Dropdown(id="db-tables-dropdown", placeholder="Select an existing table"),
|
| 314 |
dbc.Button("Display Table", id="load-data-db", color="success", size="sm", className="mt-2"),
|
|
@@ -409,14 +381,22 @@ def handle_omeka_data(n_clicks, item_set_id, client_config, table_name):
|
|
| 409 |
|
| 410 |
text_embed = helpers.generate_text_embed(df['text'].tolist())
|
| 411 |
img_embed = helpers.generate_img_embed(df['images_urls'].tolist())
|
| 412 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 413 |
df["embeddings"] = embeddings.tolist()
|
| 414 |
|
| 415 |
-
reducer = umap.UMAP(n_neighbors=15, min_dist=0.1,
|
| 416 |
umap_embeddings = reducer.fit_transform(embeddings)
|
| 417 |
df["umap_embeddings"] = umap_embeddings.tolist()
|
| 418 |
|
| 419 |
-
clusterer = hdbscan.HDBSCAN(min_cluster_size=10)
|
| 420 |
cluster_labels = clusterer.fit_predict(umap_embeddings)
|
| 421 |
df["Cluster"] = cluster_labels
|
| 422 |
|
|
@@ -708,7 +688,7 @@ def create_umap_plot(df):
|
|
| 708 |
paper_bgcolor='white',
|
| 709 |
height=700,
|
| 710 |
margin=dict(t=30, b=30, l=30, r=30),
|
| 711 |
-
showlegend=
|
| 712 |
legend=dict(
|
| 713 |
yanchor="top",
|
| 714 |
y=0.99,
|
|
|
|
| 15 |
import helpers
|
| 16 |
from omeka_s_api_client import OmekaSClient, OmekaSClientError
|
| 17 |
from lancedb_client import LanceDBManager
|
| 18 |
+
import torch
|
| 19 |
+
import torch.nn.functional as F
|
| 20 |
|
| 21 |
# Load .env for credentials
|
| 22 |
load_dotenv()
|
|
|
|
| 54 |
# Header
|
| 55 |
dbc.NavbarSimple(
|
| 56 |
children=[],
|
| 57 |
+
brand="Omeka S Computer Vision Assistant",
|
| 58 |
brand_href="/",
|
| 59 |
color="light",
|
| 60 |
dark=False,
|
|
|
|
| 72 |
|
| 73 |
# Tabs
|
| 74 |
dcc.Tabs(id="data-tabs", value="api", children=[
|
| 75 |
+
dcc.Tab(label="From Omeka S", value="omeka"),
|
| 76 |
+
dcc.Tab(label="From LanceDB", value="lance")
|
| 77 |
]),
|
| 78 |
|
| 79 |
html.Div(id="data-tab-content"),
|
|
|
|
| 206 |
html.Div(id="status"),
|
| 207 |
dcc.Store(id="omeka-client-config", storage_type="session"),
|
| 208 |
]),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
])
|
| 210 |
|
| 211 |
# -------------------- UI Callbacks --------------------
|
|
|
|
| 220 |
if tab == "omeka":
|
| 221 |
return html.Div([
|
| 222 |
html.Div([
|
| 223 |
+
html.H5("From Omeka S", className="mb-3"),
|
| 224 |
# API URL input with full width
|
| 225 |
dbc.InputGroup([
|
| 226 |
dbc.Input(
|
|
|
|
| 280 |
], className="border rounded bg-white shadow-sm")
|
| 281 |
elif tab == "lance":
|
| 282 |
return html.Div([
|
| 283 |
+
html.H5("From LanceDB"),
|
| 284 |
dbc.Button("Load LanceDB tables", id="load-tables", color="link", size="sm", className="mt-2"),
|
| 285 |
dcc.Dropdown(id="db-tables-dropdown", placeholder="Select an existing table"),
|
| 286 |
dbc.Button("Display Table", id="load-data-db", color="success", size="sm", className="mt-2"),
|
|
|
|
| 381 |
|
| 382 |
text_embed = helpers.generate_text_embed(df['text'].tolist())
|
| 383 |
img_embed = helpers.generate_img_embed(df['images_urls'].tolist())
|
| 384 |
+
# Convert to tensors if needed
|
| 385 |
+
text_tensor = torch.tensor(text_embed)
|
| 386 |
+
img_tensor = torch.tensor(img_embed)
|
| 387 |
+
|
| 388 |
+
# Average then normalize
|
| 389 |
+
combined = (0.7 * text_tensor + 0.3 * img_tensor)
|
| 390 |
+
normalized_embeddings = F.normalize(combined, p=2, dim=1)
|
| 391 |
+
|
| 392 |
+
embeddings = normalized_embeddings.numpy()
|
| 393 |
df["embeddings"] = embeddings.tolist()
|
| 394 |
|
| 395 |
+
reducer = umap.UMAP(n_neighbors=15, min_dist=0.1, metric="cosine")
|
| 396 |
umap_embeddings = reducer.fit_transform(embeddings)
|
| 397 |
df["umap_embeddings"] = umap_embeddings.tolist()
|
| 398 |
|
| 399 |
+
clusterer = hdbscan.HDBSCAN(min_cluster_size=10, metric="euclidean")
|
| 400 |
cluster_labels = clusterer.fit_predict(umap_embeddings)
|
| 401 |
df["Cluster"] = cluster_labels
|
| 402 |
|
|
|
|
| 688 |
paper_bgcolor='white',
|
| 689 |
height=700,
|
| 690 |
margin=dict(t=30, b=30, l=30, r=30),
|
| 691 |
+
showlegend=True,
|
| 692 |
legend=dict(
|
| 693 |
yanchor="top",
|
| 694 |
y=0.99,
|