differential-diffusion

Configuration error

App Files Files Community

cocktailpeanut commited on Mar 1, 2024

Commit

d7aa376

1 Parent(s): b4e7a1c

update

Browse files

Files changed (1) hide show

app.py +66 -9

app.py CHANGED Viewed

@@ -4,6 +4,18 @@ from torchvision import transforms
 from SDXL.diff_pipe import StableDiffusionXLDiffImg2ImgPipeline
 from diffusers import DPMSolverMultistepScheduler
 NUM_INFERENCE_STEPS = 50
 dtype = torch.float16
 if torch.cuda.is_available():
@@ -15,6 +27,9 @@ else:
   device = "cpu"
 #device = "cuda"
 base = StableDiffusionXLDiffImg2ImgPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=dtype, variant="fp16", use_safetensors=True
 )
@@ -32,6 +47,46 @@ base.scheduler = DPMSolverMultistepScheduler.from_config(base.scheduler.config)
 refiner.scheduler = DPMSolverMultistepScheduler.from_config(base.scheduler.config)
 def preprocess_image(image):
     image = image.convert("RGB")
     image = transforms.CenterCrop((image.size[1] // 64 * 64, image.size[0] // 64 * 64))(image)
@@ -78,12 +133,12 @@ def validate_inputs(image, map):
         raise gr.Error("Missing map")
-example1 = ["assets/input2.jpg", "assets/map2.jpg", 17.5,
-            "Tree of life under the sea, ethereal, glittering, lens flares, cinematic lighting, artwork by Anna Dittmann & Carne Griffiths, 8k, unreal engine 5, hightly detailed, intricate detailed",
-            "bad anatomy, poorly drawn face, out of frame, gibberish, lowres, duplicate, morbid, darkness, maniacal, creepy, fused, blurry background, crosseyed, extra limbs, mutilated, dehydrated, surprised, poor quality, uneven, off-centered, bird illustration, painting, cartoons"]
-example2 = ["assets/input3.jpg", "assets/map4.png", 21,
-            "overgrown atrium, nature, ancient black marble columns and terracotta tile floors, waterfall, ultra-high quality, octane render, corona render, UHD, 64k",
-            "Two bodies, Two heads, doll, extra nipples, bad anatomy, blurry, fuzzy, extra arms, extra fingers, poorly drawn hands, disfigured, tiling, deformed, mutated, out of frame, cloned face, watermark, text, lowres, disfigured, ostentatious, ugly, oversaturated, grain, low resolution, blurry, bad anatomy, poorly drawn face, mutant, mutated,  blurred, out of focus, long neck, long body, ugly, disgusting, bad drawing, childish"]
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
@@ -98,9 +153,11 @@ with gr.Blocks() as demo:
                 run_btn = gr.Button("Run",variant="primary")
         output = gr.Image(label="Output Image")
-    gr.Examples(examples=[example1, example2],inputs=[input_image, change_map, gs, prompt, neg_prompt])
-    gr.Markdown("Differential Diffusion with SDXL; Thanks to the community for the prompts in the examples.")
-    run_btn.click(inference, inputs=[input_image, change_map, gs, prompt, neg_prompt], outputs=output)
     clr_btn.add(output)
 if __name__ == "__main__":
     demo.launch()

 from SDXL.diff_pipe import StableDiffusionXLDiffImg2ImgPipeline
 from diffusers import DPMSolverMultistepScheduler
+# DepthAnything
+import cv2
+import numpy as np
+import os
+from PIL import Image
+import torch.nn.functional as F
+from torchvision.transforms import Compose
+import tempfile
+from gradio_imageslider import ImageSlider
+from depth_anything.dpt import DepthAnything
+from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
 NUM_INFERENCE_STEPS = 50
 dtype = torch.float16
 if torch.cuda.is_available():
   device = "cpu"
 #device = "cuda"
+encoder = 'vitl' # can also be 'vitb' or 'vitl'
+model = DepthAnything.from_pretrained(f"LiheYoung/depth_anything_{encoder}14").to(DEVICE).eval()
 base = StableDiffusionXLDiffImg2ImgPipeline.from_pretrained(
     "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=dtype, variant="fp16", use_safetensors=True
 )
 refiner.scheduler = DPMSolverMultistepScheduler.from_config(base.scheduler.config)
+# DepthAnything
+@torch.no_grad()
+def predict_depth(model, image):
+    return model(image)
+def depthify(image):
+    original_image = image.copy()
+    h, w = image.shape[:2]
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
+    image = transform({'image': image})['image']
+    image = torch.from_numpy(image).unsqueeze(0).to(DEVICE)
+    depth = predict_depth(model, image)
+    depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
+    raw_depth = Image.fromarray(depth.cpu().numpy().astype('uint8'))
+    tmp = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
+    raw_depth.save(tmp.name)
+    depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
+    depth = depth.cpu().numpy().astype(np.uint8)
+    colored_depth = cv2.applyColorMap(depth, cv2.COLORMAP_INFERNO)[:, :, ::-1]
+    return [(original_image, colored_depth), tmp.name, raw_depth]
+# DifferentialDiffusion
 def preprocess_image(image):
     image = image.convert("RGB")
     image = transforms.CenterCrop((image.size[1] // 64 * 64, image.size[0] // 64 * 64))(image)
         raise gr.Error("Missing map")
+def run(image, gs, prompt, neg_prompt):
+    # first run
+    [(original_image, colored_depth), name, raw_depth] = depthify(image)
+    print(f"original_image={original_image} colored_depth={colored_depth}, name={name}, raw_depth={raw_depth}")
+    return inference(original_image, raw_depth, gs, prompt, neg_prompt)
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
                 run_btn = gr.Button("Run",variant="primary")
         output = gr.Image(label="Output Image")
+    run_btn.click(
+      inference,
+      inputs=[input_image, change_map, gs, prompt, neg_prompt],
+      outputs=output
+    )
     clr_btn.add(output)
 if __name__ == "__main__":
     demo.launch()