Spaces:

huzey
/

ncut-pytorch

Running on Zero

App Files Files Community

huzey commited on Sep 8, 2024

Commit

5e7bae6

1 Parent(s): 5f0daa6

fix align3model load

Browse files

Files changed (1) hide show

app.py +67 -22

app.py CHANGED Viewed

@@ -678,13 +678,14 @@ def plot_one_image_36_grid(original_image, tsne_rgb_images):
     return img
 def load_alignedthreemodel():
-    os.system("git clone https://huggingface.co/huzey/alignedthreeattn >> /dev/null 2>&1")
-    # pull
-    os.system("git -C alignedthreeattn pull >> /dev/null 2>&1")
-    # add to path
     import sys
-    sys.path.append("alignedthreeattn")
     from alignedthreeattn.alignedthreeattn_model import ThreeAttnNodes
@@ -692,11 +693,6 @@ def load_alignedthreemodel():
     align_weights = torch.load("alignedthreeattn/align_weights.pth")
     model = ThreeAttnNodes(align_weights)
-    # url = 'https://huggingface.co/huzey/aligned_model_test/resolve/main/3attn_nodes.pth'
-    # save_path = "alignedthreemodel.pth"
-    # if not os.path.exists(save_path):
-    #     os.system(f"wget {url} -O {save_path} -q")
-    # model = torch.load(save_path)
     return model
 promptable_diffusion_models = ["Diffusion(stabilityai/stable-diffusion-2)", "Diffusion(CompVis/stable-diffusion-v1-4)"]
@@ -1174,7 +1170,7 @@ with demo:
             with gr.Column(scale=5, min_width=200):
                 input_gallery, submit_button, clear_images_button = make_input_images_section()
                 dataset_dropdown, num_images_slider, random_seed_slider, load_images_button = make_dataset_images_section()
-                logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
             with gr.Column(scale=5, min_width=200):
                 output_gallery = make_output_images_section()
@@ -1490,17 +1486,65 @@ with demo:
                 # logging text box
                 logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
-        # galleries = []
-        # for i_model, model_name in enumerate(["CLIP", "DINO", "MAE"]):
-        #     with gr.Row():
-        #         for i_layer in range(1, 13):
-        #             with gr.Column(scale=5, min_width=200):
-        #                 gr.Markdown(f'### {model_name} Layer {i_layer}')
-        #                 output_gallery = gr.Gallery(value=[], label="NCUT Embedding", show_label=False, elem_id="ncut", columns=[3], rows=[1], object_fit="contain", height="auto", show_fullscreen_button=True)
-        #                 galleries.append(output_gallery)
-        # clear_images_button.click(lambda x: [] * (len(galleries) + 1), outputs=[input_gallery] + galleries)
         clear_images_button.click(lambda x: ([], []), outputs=[input_gallery, output_gallery])
         false_placeholder = gr.Checkbox(label="False", value=False, elem_id="false_placeholder", visible=False)
@@ -1520,6 +1564,7 @@ with demo:
             outputs=[output_gallery, logging_text],
         )
     with gr.Tab('Compare Models'):
         def add_one_model(i_model=1):
             with gr.Column(scale=5, min_width=200) as col:

     return img
 def load_alignedthreemodel():
     import sys
+    if "alignedthreeattn" not in sys.path:
+        for _ in range(3):
+            os.system("git clone https://huggingface.co/huzey/alignedthreeattn >> /dev/null 2>&1")
+            os.system("git -C alignedthreeattn pull >> /dev/null 2>&1")
+        # add to path
+        sys.path.append("alignedthreeattn")
     from alignedthreeattn.alignedthreeattn_model import ThreeAttnNodes
     align_weights = torch.load("alignedthreeattn/align_weights.pth")
     model = ThreeAttnNodes(align_weights)
     return model
 promptable_diffusion_models = ["Diffusion(stabilityai/stable-diffusion-2)", "Diffusion(CompVis/stable-diffusion-v1-4)"]
             with gr.Column(scale=5, min_width=200):
                 input_gallery, submit_button, clear_images_button = make_input_images_section()
                 dataset_dropdown, num_images_slider, random_seed_slider, load_images_button = make_dataset_images_section()
+                logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information", autofocus=False, autoscroll=False)
             with gr.Column(scale=5, min_width=200):
                 output_gallery = make_output_images_section()
                 # logging text box
                 logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
+        clear_images_button.click(lambda x: ([], []), outputs=[input_gallery, output_gallery])
+        false_placeholder = gr.Checkbox(label="False", value=False, elem_id="false_placeholder", visible=False)
+        no_prompt = gr.Textbox("", label="", elem_id="empty_placeholder", type="text", placeholder="", visible=False)
+        submit_button.click(
+            run_fn,
+            inputs=[
+                input_gallery, model_dropdown, layer_slider, num_eig_slider, node_type_dropdown,
+                positive_prompt, negative_prompt,
+                false_placeholder, no_prompt, no_prompt, no_prompt,
+                affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
+                embedding_method_dropdown, embedding_metric_dropdown, num_sample_tsne_slider, knn_tsne_slider,
+                perplexity_slider, n_neighbors_slider, min_dist_slider, sampling_method_dropdown
+            ],
+            # outputs=galleries + [logging_text],
+            outputs=[output_gallery, logging_text],
+        )
+    with gr.Tab('Model Aligned (+Recursive)'):
+        gr.Markdown('This page reproduce the results from the paper [AlignedCut](https://arxiv.org/abs/2406.18344)')
+        gr.Markdown('---')
+        gr.Markdown('**Features are aligned across models and layers.** A linear alignment transform is trained for each model/layer, learning signal comes from 1) fMRI brain activation and 2) segmentation preserving eigen-constraints.')
+        gr.Markdown('NCUT is computed on the concatenated graph of all models, layers, and images. Color is **aligned** across all models and layers.')
+        gr.Markdown('')
+        gr.Markdown("To see a good pattern, you will need to load 100~1000 images. 100 images need 10sec for RTX4090. Running out of HuggingFace GPU Quota? Try [Demo](https://ncut-pytorch.readthedocs.io/en/latest/demo/) hosted at UPenn")
+        gr.Markdown('---')
+        with gr.Row():
+            with gr.Column(scale=5, min_width=200):
+                input_gallery, submit_button, clear_images_button = make_input_images_section()
+                dataset_dropdown, num_images_slider, random_seed_slider, load_images_button = make_dataset_images_section(advanced=True, is_random=True)
+                num_images_slider.value = 100
+            with gr.Column(scale=5, min_width=200):
+                output_gallery = make_output_images_section()
+                gr.Markdown('### TIP1: use the `full-screen` button, and use `arrow keys` to navigate')
+                gr.Markdown('---')
+                gr.Markdown('Model: CLIP(ViT-B-16/openai), DiNOv2reg(dinov2_vitb14_reg), MAE(vit_base)')
+                gr.Markdown('Layer type: attention output (attn), without sum of residual')
+                gr.Markdown('### TIP2: for large image set, please increase the `num_sample` for t-SNE and NCUT')
+                gr.Markdown('---')
+                [
+                    model_dropdown, layer_slider, node_type_dropdown, num_eig_slider,
+                    affinity_focal_gamma_slider, num_sample_ncut_slider, knn_ncut_slider,
+                    embedding_method_dropdown, embedding_metric_dropdown, num_sample_tsne_slider, knn_tsne_slider,
+                    perplexity_slider, n_neighbors_slider, min_dist_slider,
+                    sampling_method_dropdown, positive_prompt, negative_prompt
+                ] = make_parameters_section()
+                model_dropdown.value = "AlignedThreeModelAttnNodes"
+                model_dropdown.visible = False
+                layer_slider.visible = False
+                node_type_dropdown.visible = False
+                num_sample_ncut_slider.value = 10000
+                num_sample_tsne_slider.value = 1000
+                # logging text box
+                logging_text = gr.Textbox("Logging information", label="Logging", elem_id="logging", type="text", placeholder="Logging information")
         clear_images_button.click(lambda x: ([], []), outputs=[input_gallery, output_gallery])
         false_placeholder = gr.Checkbox(label="False", value=False, elem_id="false_placeholder", visible=False)
             outputs=[output_gallery, logging_text],
         )
     with gr.Tab('Compare Models'):
         def add_one_model(i_model=1):
             with gr.Column(scale=5, min_width=200) as col: