Spaces:

huzey
/

ncut-pytorch

Running on Zero

App Files Files Community

huzey commited on Sep 8, 2024

Commit

4e7b524

1 Parent(s): 8f2d7ad

update cluster plot

Browse files

Files changed (1) hide show

app.py +160 -28

app.py CHANGED Viewed

@@ -1,10 +1,13 @@
 # Author: Huzheng Yang
 # %%
 import copy
 from io import BytesIO
 import os
 from matplotlib import pyplot as plt
 USE_HUGGINGFACE_ZEROGPU = os.getenv("USE_HUGGINGFACE_ZEROGPU", "False").lower() in ["true", "1", "yes"]
 DOWNLOAD_ALL_MODELS_DATASETS = os.getenv("DOWNLOAD_ALL_MODELS_DATASETS", "False").lower() in ["true", "1", "yes"]
@@ -219,17 +222,111 @@ def run_alignedthreemodelattnnodes(images, model, batch_size=16):
     return outputs
 def ncut_run(
     model,
     images,
-    model_name="SAM(sam_vit_b)",
-    layer=-1,
     num_eig=100,
     node_type="block",
-    affinity_focal_gamma=0.3,
     num_sample_ncut=10000,
     knn_ncut=10,
-    embedding_method="UMAP",
     embedding_metric='euclidean',
     num_sample_tsne=1000,
     knn_tsne=10,
@@ -353,8 +450,10 @@ def ncut_run(
             logging_str += _logging_str
             rgb.append(_rgb[0])
-    if not old_school_ncut:  # joint across all images
-        rgb, _logging_str, _ = compute_ncut(
             features,
             num_eig=num_eig,
             num_sample_ncut=num_sample_ncut,
@@ -384,7 +483,6 @@ def ncut_run(
                 pil_images.append(_im)
             return pil_images, logging_str
         if is_lisa == True:
             # dirty patch for the LISA model
@@ -396,16 +494,26 @@ def ncut_run(
         rgb = dont_use_too_much_green(rgb)
     if video_output:
         video_path = get_random_path()
         video_cache.add_video(video_path)
         pil_images_to_video(to_pil_images(rgb), video_path)
         return video_path, logging_str
-    else:
-        return to_pil_images(rgb), logging_str
 def _ncut_run(*args, **kwargs):
     try:
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
@@ -414,15 +522,17 @@ def _ncut_run(*args, **kwargs):
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
         return ret
     except Exception as e:
         gr.Error(str(e))
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
-        return [], "Error: " + str(e)
     # ret = ncut_run(*args, **kwargs)
     # return ret
 if USE_HUGGINGFACE_ZEROGPU:
@@ -488,6 +598,16 @@ def transform_image(image, resolution=(1024, 1024), stablediffusion=False):
         image = image * 2 - 1
     return image
 def plot_one_image_36_grid(original_image, tsne_rgb_images):
     mean = [0.485, 0.456, 0.406]
     std = [0.229, 0.224, 0.225]
@@ -583,8 +703,8 @@ promptable_segmentation_models = ["LISA(xinlai/LISA-7B-v1)"]
 def run_fn(
     images,
-    model_name="SAM(sam_vit_b)",
-    layer=-1,
     num_eig=100,
     node_type="block",
     positive_prompt="",
@@ -593,15 +713,15 @@ def run_fn(
     lisa_prompt1="",
     lisa_prompt2="",
     lisa_prompt3="",
-    affinity_focal_gamma=0.3,
     num_sample_ncut=10000,
     knn_ncut=10,
-    embedding_method="UMAP",
     embedding_metric='euclidean',
-    num_sample_tsne=1000,
     knn_tsne=10,
-    perplexity=500,
-    n_neighbors=500,
     min_dist=0.1,
     sampling_method="fps",
     old_school_ncut=False,
@@ -613,11 +733,12 @@ def run_fn(
     recursion_l1_gamma=0.5,
     recursion_l2_gamma=0.5,
     recursion_l3_gamma=0.5,
 ):
     if images is None:
         gr.Warning("No images selected.")
-        return [], "No images selected."
     video_output = False
     if isinstance(images, str):
@@ -733,6 +854,7 @@ def run_fn(
         "lisa_prompt2": lisa_prompt2,
         "lisa_prompt3": lisa_prompt3,
         "is_lisa": is_lisa,
     }
     # print(kwargs)
@@ -1042,9 +1164,11 @@ with demo:
             with gr.Column(scale=5, min_width=200):
                 input_gallery, submit_button, clear_images_button = make_input_images_section()
                 dataset_dropdown, num_images_slider, random_seed_slider, load_images_button = make_dataset_images_section()
             with gr.Column(scale=5, min_width=200):
                 output_gallery = make_output_images_section()
                 [
                     model_dropdown, layer_slider, node_type_dropdown, num_eig_slider,
                     affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
@@ -1052,16 +1176,15 @@ with demo:
                     perplexity_slider, n_neighbors_slider, min_dist_slider,
                     sampling_method_dropdown, positive_prompt, negative_prompt
                 ] = make_parameters_section()
-                # logging text box
-                logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
-        clear_images_button.click(lambda x: ([], []), outputs=[input_gallery, output_gallery])
         false_placeholder = gr.Checkbox(label="False", value=False, elem_id="false_placeholder", visible=False)
         no_prompt = gr.Textbox("", label="", elem_id="empty_placeholder", type="text", placeholder="", visible=False)
         submit_button.click(
-            run_fn,
             inputs=[
                 input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
                 positive_prompt, negative_prompt,
@@ -1070,7 +1193,7 @@ with demo:
                 embedding_method_dropdown, embedding_metric_dropdown, num_sample_tsne_slider, knn_tsne_slider,
                 perplexity_slider, n_neighbors_slider, min_dist_slider, sampling_method_dropdown
             ],
-            outputs=[output_gallery, logging_text],
             api_name="API_AlignedCut"
         )
@@ -1201,7 +1324,7 @@ with demo:
         no_prompt = gr.Textbox("", label="", elem_id="empty_placeholder", type="text", placeholder="", visible=False)
         submit_button.click(
-            run_fn,
             inputs=[
                 input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
                 positive_prompt, negative_prompt,
@@ -1305,7 +1428,7 @@ with demo:
         galleries = [l1_gallery, l2_gallery, l3_gallery]
         true_placeholder = gr.Checkbox(label="True placeholder", value=True, elem_id="true_placeholder", visible=False)
         submit_button.click(
-            run_fn,
             inputs=[
                 input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
                 positive_prompt, negative_prompt,
@@ -1465,6 +1588,7 @@ with demo:
         gr.Markdown("**This demo is for the Python package `ncut-pytorch`, please visit the [Documentation](https://ncut-pytorch.readthedocs.io/)**")
         gr.Markdown("**All the models and functions used for this demo are in the Python package `ncut-pytorch`**")
         gr.Markdown("---")
         gr.Markdown("**Normalized Cuts**, aka. spectral clustering, is a graphical method to analyze data grouping in the affinity eigenvector space. It has been widely used for unsupervised segmentation in the 2000s.")
         gr.Markdown("*Normalized Cuts and Image Segmentation, Jianbo Shi and Jitendra Malik, 2000*")
         gr.Markdown("---")
@@ -1473,7 +1597,9 @@ with demo:
         gr.Markdown("- **spectral-tSNE** visualization, a new method to visualize the high-dimensional eigenvector space with 3D RGB cube. Color is aligned across images, color infers distance in representation.")
         gr.Markdown("*paper in prep, Yang 2024*")
         gr.Markdown("*AlignedCut: Visual Concepts Discovery on Brain-Guided Universal Feature Space, Huzheng Yang, James Gee\*, and Jianbo Shi\*, 2024*")
     with gr.Row():
@@ -1497,4 +1623,10 @@ if DOWNLOAD_ALL_MODELS_DATASETS:
 demo.launch(share=True)
-# %%

 # Author: Huzheng Yang
 # %%
 import copy
+from functools import partial
 from io import BytesIO
 import os
+from einops import rearrange
 from matplotlib import pyplot as plt
+import matplotlib
 USE_HUGGINGFACE_ZEROGPU = os.getenv("USE_HUGGINGFACE_ZEROGPU", "False").lower() in ["true", "1", "yes"]
 DOWNLOAD_ALL_MODELS_DATASETS = os.getenv("DOWNLOAD_ALL_MODELS_DATASETS", "False").lower() in ["true", "1", "yes"]
     return outputs
+def _reds_colormap(image):
+    # normed_data = image / image.max()  # Normalize to [0, 1]
+    normed_data = image
+    colormap = matplotlib.colormaps['inferno']  # Get the Reds colormap
+    colored_image = colormap(normed_data)  # Apply colormap
+    return (colored_image[..., :3] * 255).astype(np.uint8)  # Convert to RGB
+# heatmap images
+def apply_reds_colormap(images, size):
+    # for i_image in range(images.shape[0]):
+    #     images[i_image] -= images[i_image].min()
+    #     images[i_image] /= images[i_image].max()
+    # normed_data = [_reds_colormap(images[i]) for i in range(images.shape[0])]
+    # normed_data = np.stack(normed_data)
+    normed_data = _reds_colormap(images)
+    normed_data = torch.tensor(normed_data).float()
+    normed_data = rearrange(normed_data, "b h w c -> b c h w")
+    normed_data = torch.nn.functional.interpolate(normed_data, size=size, mode="nearest")
+    normed_data = rearrange(normed_data, "b c h w -> b h w c")
+    normed_data = normed_data.cpu().numpy().astype(np.uint8)
+    return normed_data
+# Blend heatmap with the original image
+def blend_image_with_heatmap(image, heatmap, opacity1=0.5, opacity2=0.5):
+    blended = (1 - opacity1) * image + opacity2 * heatmap
+    return blended.astype(np.uint8)
+def make_cluster_plot(eigvecs, images, h=64, w=64):
+    from ncut_pytorch.ncut_pytorch import farthest_point_sampling
+    magnitude = torch.norm(eigvecs, dim=-1)
+    p = 0.5
+    top_p_idx = magnitude.argsort(descending=True)[:int(p * magnitude.shape[0])]
+    num_samples = 50
+    fps_idx = farthest_point_sampling(eigvecs[top_p_idx], num_samples)
+    fps_idx = top_p_idx[fps_idx]
+    # downsample to 256x256
+    images = F.interpolate(images, (256, 256), mode="bilinear")
+    images = images.cpu().numpy()
+    images = images.transpose(0, 2, 3, 1)
+    images = images * 255
+    images = images.astype(np.uint8)
+    # sort the fps_idx by the mean of the heatmap
+    fps_heatmaps = {}
+    sort_values = []
+    for _, idx in enumerate(fps_idx):
+        device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        eigvecs = eigvecs.to(device)
+        heatmap = F.cosine_similarity(eigvecs, eigvecs[idx][None], dim=-1)
+        heatmap = heatmap.reshape(-1, h, w)
+        mask = (heatmap > 0.5).float()
+        sort_values.append(mask.mean().item())
+        fps_heatmaps[idx.item()] = heatmap.cpu()
+    fig_images = []
+    i_cluster = 0
+    for i_fig in range(10):
+        fig, axs = plt.subplots(3, 5, figsize=(15, 9))
+        for ax in axs.flatten():
+            ax.axis("off")
+        for j, idx in enumerate(fps_idx[i_fig*5:i_fig*5+5]):
+            heatmap = fps_heatmaps[idx.item()]
+            mask = (heatmap > 0.1).float()
+            sorted_image_idxs = torch.argsort(mask.mean((1, 2)), descending=True)
+            size = (images.shape[1], images.shape[2])
+            heatmap = apply_reds_colormap(heatmap, size)
+            for i, image_idx in enumerate(sorted_image_idxs[:3]):
+                _heatmap = blend_image_with_heatmap(images[image_idx], heatmap[image_idx])
+                axs[i, j].imshow(_heatmap)
+                if i == 0:
+                    axs[i, j].set_title(f"cluster {i_cluster+1}", fontsize=24)
+                    i_cluster += 1
+        plt.tight_layout(h_pad=0.5, w_pad=0.3)
+        buf = BytesIO()
+        plt.savefig(buf, bbox_inches='tight', dpi=72)
+        buf.seek(0)  # Move to the start of the BytesIO buffer
+        img = Image.open(buf)
+        img = img.convert("RGB")
+        img = copy.deepcopy(img)
+        buf.close()
+        fig_images.append(img)
+        plt.close()
+        # plt.imshow(img)
+        # plt.axis("off")
+        # plt.show()
+    return fig_images
 def ncut_run(
     model,
     images,
+    model_name="DiNO(dino_vitb8_448)",
+    layer=10,
     num_eig=100,
     node_type="block",
+    affinity_focal_gamma=0.5,
     num_sample_ncut=10000,
     knn_ncut=10,
+    embedding_method="tsne_3d",
     embedding_metric='euclidean',
     num_sample_tsne=1000,
     knn_tsne=10,
             logging_str += _logging_str
             rgb.append(_rgb[0])
+    cluster_images = None
+    if not old_school_ncut:  # ailgnedcut, joint across all images
+        rgb, _logging_str, eigvecs = compute_ncut(
             features,
             num_eig=num_eig,
             num_sample_ncut=num_sample_ncut,
                 pil_images.append(_im)
             return pil_images, logging_str
         if is_lisa == True:
             # dirty patch for the LISA model
         rgb = dont_use_too_much_green(rgb)
+        if not video_output:
+            start = time.time()
+            h, w = features.shape[1], features.shape[2]
+            _images = reverse_transform_image(images, stablediffusion="stable" in model_name.lower())
+            cluster_images = make_cluster_plot(eigvecs, _images, h=h, w=w)
+            logging_str += f"Plot time: {time.time() - start:.2f}s\n"
     if video_output:
         video_path = get_random_path()
         video_cache.add_video(video_path)
         pil_images_to_video(to_pil_images(rgb), video_path)
         return video_path, logging_str
+    return to_pil_images(rgb), cluster_images, logging_str
 def _ncut_run(*args, **kwargs):
+    n_ret = kwargs.pop("n_ret", 1)
     try:
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+        ret = list(ret)[:n_ret] + [ret[-1]]
         return ret
     except Exception as e:
         gr.Error(str(e))
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+        return *(None for _ in range(n_ret)), "Error: " + str(e)
     # ret = ncut_run(*args, **kwargs)
+    # ret = list(ret)[:n_ret] + [ret[-1]]
     # return ret
 if USE_HUGGINGFACE_ZEROGPU:
         image = image * 2 - 1
     return image
+def reverse_transform_image(image, stablediffusion=False):
+    if stablediffusion:
+        image = (image + 1) / 2
+    else:
+        mean = [0.485, 0.456, 0.406]
+        std = [0.229, 0.224, 0.225]
+        image = image * torch.tensor(std).view(3, 1, 1) + torch.tensor(mean).view(3, 1, 1)
+    image = torch.clamp(image, 0, 1)
+    return image
 def plot_one_image_36_grid(original_image, tsne_rgb_images):
     mean = [0.485, 0.456, 0.406]
     std = [0.229, 0.224, 0.225]
 def run_fn(
     images,
+    model_name="DiNO(dino_vitb8_448)",
+    layer=10,
     num_eig=100,
     node_type="block",
     positive_prompt="",
     lisa_prompt1="",
     lisa_prompt2="",
     lisa_prompt3="",
+    affinity_focal_gamma=0.5,
     num_sample_ncut=10000,
     knn_ncut=10,
+    embedding_method="tsne_3d",
     embedding_metric='euclidean',
+    num_sample_tsne=300,
     knn_tsne=10,
+    perplexity=150,
+    n_neighbors=150,
     min_dist=0.1,
     sampling_method="fps",
     old_school_ncut=False,
     recursion_l1_gamma=0.5,
     recursion_l2_gamma=0.5,
     recursion_l3_gamma=0.5,
+    n_ret=1,
 ):
     if images is None:
         gr.Warning("No images selected.")
+        return *(None for _ in range(n_ret)), "No images selected."
     video_output = False
     if isinstance(images, str):
         "lisa_prompt2": lisa_prompt2,
         "lisa_prompt3": lisa_prompt3,
         "is_lisa": is_lisa,
+        "n_ret": n_ret,
     }
     # print(kwargs)
             with gr.Column(scale=5, min_width=200):
                 input_gallery, submit_button, clear_images_button = make_input_images_section()
                 dataset_dropdown, num_images_slider, random_seed_slider, load_images_button = make_dataset_images_section()
+                logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
             with gr.Column(scale=5, min_width=200):
                 output_gallery = make_output_images_section()
+                cluster_gallery = gr.Gallery(value=[], label="Clusters", show_label=False, elem_id="clusters", columns=[2], rows=[1], object_fit="contain", height="auto", show_share_button=True, preview=True)
                 [
                     model_dropdown, layer_slider, node_type_dropdown, num_eig_slider,
                     affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
                     perplexity_slider, n_neighbors_slider, min_dist_slider,
                     sampling_method_dropdown, positive_prompt, negative_prompt
                 ] = make_parameters_section()
+                num_eig_slider.value = 30
+        clear_images_button.click(lambda x: ([], [], []), outputs=[input_gallery, output_gallery, cluster_gallery])
         false_placeholder = gr.Checkbox(label="False", value=False, elem_id="false_placeholder", visible=False)
         no_prompt = gr.Textbox("", label="", elem_id="empty_placeholder", type="text", placeholder="", visible=False)
         submit_button.click(
+            partial(run_fn, n_ret=2),
             inputs=[
                 input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
                 positive_prompt, negative_prompt,
                 embedding_method_dropdown, embedding_metric_dropdown, num_sample_tsne_slider, knn_tsne_slider,
                 perplexity_slider, n_neighbors_slider, min_dist_slider, sampling_method_dropdown
             ],
+            outputs=[output_gallery, cluster_gallery, logging_text],
             api_name="API_AlignedCut"
         )
         no_prompt = gr.Textbox("", label="", elem_id="empty_placeholder", type="text", placeholder="", visible=False)
         submit_button.click(
+            partial(run_fn, n_ret=3),
             inputs=[
                 input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
                 positive_prompt, negative_prompt,
         galleries = [l1_gallery, l2_gallery, l3_gallery]
         true_placeholder = gr.Checkbox(label="True placeholder", value=True, elem_id="true_placeholder", visible=False)
         submit_button.click(
+            partial(run_fn, n_ret=len(galleries)),
             inputs=[
                 input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
                 positive_prompt, negative_prompt,
         gr.Markdown("**This demo is for the Python package `ncut-pytorch`, please visit the [Documentation](https://ncut-pytorch.readthedocs.io/)**")
         gr.Markdown("**All the models and functions used for this demo are in the Python package `ncut-pytorch`**")
         gr.Markdown("---")
+        gr.Markdown("---")
         gr.Markdown("**Normalized Cuts**, aka. spectral clustering, is a graphical method to analyze data grouping in the affinity eigenvector space. It has been widely used for unsupervised segmentation in the 2000s.")
         gr.Markdown("*Normalized Cuts and Image Segmentation, Jianbo Shi and Jitendra Malik, 2000*")
         gr.Markdown("---")
         gr.Markdown("- **spectral-tSNE** visualization, a new method to visualize the high-dimensional eigenvector space with 3D RGB cube. Color is aligned across images, color infers distance in representation.")
         gr.Markdown("*paper in prep, Yang 2024*")
         gr.Markdown("*AlignedCut: Visual Concepts Discovery on Brain-Guided Universal Feature Space, Huzheng Yang, James Gee\*, and Jianbo Shi\*, 2024*")
+        gr.Markdown("---")
+        gr.Markdown("---")
+        gr.Markdown('<p style="text-align: center;">We thank the HuggingFace team for hosting this demo.</p>')
     with gr.Row():
 demo.launch(share=True)
+# # %%
+# # debug
+# # change working directory to "/"
+# os.chdir("/")
+# images = [(Image.open(image), None) for image in default_images]
+# ret = run_fn(images, num_eig=30)
+# # %%