ProGamerGov
/

qwen-360-diffusion

+"""
+Install dependencies:
+pip install pytorch360convert
+Example ffmpeg command to use on output frames:
+ffmpeg -framerate 60 -i output_frames/sweep360_%06d.png -c:v libx264 -pix_fmt yuv420p my_360_video.mp4
+# Example for calculating FOV to use for specific dimensions
+import math
+width, height = 1280, 896
+ratio = width / height
+vfov_deg = 70.0
+vfov = math.radians(vfov_deg)
+hfov = 2 * math.atan(ratio * math.tan(vfov / 2))
+hfov_deg = math.degrees(hfov)
+print(hfov_deg)  # ~90.02°
+"""
+import math
+import os
+from typing import Dict, List, Optional, Tuple, Union
+import torch
+from pytorch360convert import e2p
+from PIL import Image
+import numpy as np
+from tqdm import tqdm
+def load_image_to_tensor(path: str, device: Optional[torch.device] = None) -> torch.Tensor:
+    """
+    Load an image file to a float torch tensor in CHW format, range [0,1].
+    """
+    img = Image.open(path).convert("RGB")
+    arr = np.array(img).astype(np.float32) / 255.0  # HWC float32
+    t = torch.from_numpy(arr)  # HWC
+    t = t.permute(2, 0, 1)     # CHW
+    if device is not None:
+        t = t.to(device)
+    return t
+def _linear_progress(n_frames: int) -> List[float]:
+    """
+    Generate a linear progression from 0.0 to 1.0 over n_frames.
+    Args:
+        n_frames (int): Number of frames.
+    Returns:
+        List[float]: List of normalized progress values.
+    """
+    return [i / max(1, (n_frames - 1)) for i in range(n_frames)]
+def _ease_in_out_progress(n_frames: int) -> List[float]:
+    """
+    Generate an ease-in-out progression (cosine smoothing) from 0.0 to 1.0.
+    Args:
+        n_frames (int): Number of frames.
+    Returns:
+        List[float]: List of normalized progress values.
+    """
+    return [
+        0.5 * (1 - math.cos(math.pi * (i / max(1, (n_frames - 1)))))
+        for i in range(n_frames)
+    ]
+def _save_tensor_as_image(tensor: torch.Tensor, path: str) -> None:
+    """
+    Save a CHW float tensor (range [0, 1]) to directory
+    """
+    if tensor.dim() == 4:  # [B,H,W,C] -> take first
+        tensor = tensor[0]
+    tensor = tensor.permute(1, 2, 0)
+    t = tensor.detach().cpu().clamp(0.0, 1.0) * 255.0
+    Image.fromarray(t.to(dtype=torch.uint8).numpy()).save(path)
+def generate_frames_from_equirect(
+    equi_tensors: List[torch.Tensor],
+    out_dir: str,
+    resolution: Tuple[int, int] = (1080, 1920),
+    fps: int = 30,
+    duration_per_image: Optional[float] = 4.0,
+    total_duration: Optional[float] = None,
+    fov_deg: Union[float, Tuple[float, float]] = (70.0, 60.0),
+    interpolation_mode: str = "bilinear",
+    speed_profile: str = "constant",
+    vertical_movement: Optional[Dict] = None,
+    device: Optional[torch.device] = None,
+    start_frame_index: int = 0,
+    save_format: str = "png",
+    start_yaw_deg: float = 0.0,
+    end_yaw_deg: float = 360.0,
+    filename_prefix: str = "frame",
+    verbose: bool = True,
+) -> List[str]:
+    """
+    Generate video frames by sweeping through one or more equirectangular images.
+    Args:
+        equi_tensors (List[torch.Tensor]): List of equirectangular image tensors.
+        out_dir (str): Output directory where frames will be saved.
+        resolution (tuple of int): Output frame resolution as (height, width). Default: (1080, 1920)
+        fps (int): Frames per second for timing calculations. Default: 30
+        duration_per_image (float): Duration in seconds for each image sweep. Default: 4.0
+        total_duration (float): Total duration in seconds for all images combined. Default: None
+        fov_deg (float or tuple): Field of view in degrees. Default: (70.0, 60.0)
+        interpolation_mode (str): Resampling interpolation. Options: "nearest", "bilinear", "bicubic". Default: "bilinear"
+        speed_profile (str): Progression curve. Options: "constant", "ease_in_out". Default: "constant"
+        vertical_movement (dict): Parameters for adding pitch movement. Default: None
+        device (torch.device): Torch device to run on. Default: cpu
+        start_frame_index (int): Starting frame index for naming. Default: 0
+        save_format (str): Image format. Options: "png", "jpg", "jpeg", "bmp". Default: "png"
+        start_yaw_deg (float): Starting yaw angle in degrees. Default: 0.0
+        end_yaw_deg (float): Ending yaw angle in degrees. Default: 360.0
+        filename_prefix (str): Prefix for saved frame filenames. Default: "frame"
+        verbose (bool): Print progress information. Default: True
+    Returns:
+        List[str]: List of file paths for the saved frames.
+    """
+    os.makedirs(out_dir, exist_ok=True)
+    device = device if device is not None else torch.device("cpu")
+    saved_paths = []
+    n_images = len(equi_tensors)
+    if n_images == 0:
+        return saved_paths
+    # Decide frames per image
+    if total_duration is not None:
+        assert total_duration > 0
+        seconds_per_image = total_duration / n_images
+    else:
+        seconds_per_image = duration_per_image if duration_per_image is not None else 4.0
+    frames_per_image = max(1, int(round(seconds_per_image * fps)))
+    # Calculate degrees per frame for consistent speed
+    vm = vertical_movement or {"mode": "none"}
+    vm_mode = vm.get("mode", "none")
+    horizontal_distance = abs(end_yaw_deg - start_yaw_deg)
+    degrees_per_frame = horizontal_distance / frames_per_image
+    # Calculate total frames for progress tracking
+    total_frames = n_images * frames_per_image
+    # Add extra frames for separate pole sweep if enabled
+    if vm_mode == "separate" or vm_mode == "both":
+        # Pole sweep path: level (0°) -> down (-85°) -> up (+85°) -> level (0°) = 340° total
+        vertical_distance = 340.0
+        pole_frames = max(1, int(round(vertical_distance / degrees_per_frame)))
+        total_frames += n_images * pole_frames
+    # Choose progress function
+    if speed_profile == "constant":
+        progress_fn = _linear_progress
+    elif speed_profile == "ease_in_out":
+        progress_fn = _ease_in_out_progress
+    else:
+        raise ValueError("speed_profile must be 'constant' or 'ease_in_out'")
+    frame_idx = start_frame_index
+    current_frame = 0
+    e2p_jit = e2p
+    yaw_start, yaw_end = start_yaw_deg, end_yaw_deg
+    for img_idx, e_img in enumerate(equi_tensors):
+        if verbose:
+            print(f"Processing image {img_idx + 1}/{n_images}...")
+        n = frames_per_image
+        prog = progress_fn(n)
+        yaw_values = [yaw_start + p * (yaw_end - yaw_start) for p in prog]
+        # Vertical values
+        if vm_mode == "during" or vm_mode == "both":
+            amplitude = float(vm.get("amplitude_deg", 15.0))
+            vertical_pattern = vm.get("pattern", "sine")
+            if vertical_pattern == "sine":
+                v_values = [amplitude * math.sin(2 * math.pi * p) for p in prog]
+            else:
+                v_values = [amplitude * (2 * p - 1) for p in prog]
+        else:
+            v_values = [0.0] * n
+        # Rotation frames
+        for i_frame in tqdm(range(n), desc=f"Image {img_idx + 1} rotation", disable=not verbose):
+            h_deg = yaw_values[i_frame]
+            v_deg = v_values[i_frame]
+            pers = e2p_jit(
+                e_img,
+                fov_deg=fov_deg,
+                h_deg=h_deg,
+                v_deg=v_deg,
+                out_hw=resolution,
+                mode=interpolation_mode,
+                channels_first=True,
+            ).unsqueeze(0)
+            filename = f"{filename_prefix}_{frame_idx:06d}.{save_format}"
+            path = os.path.join(out_dir, filename)
+            _save_tensor_as_image(pers, path)
+            saved_paths.append(path)
+            frame_idx += 1
+            current_frame += 1
+        # Optional separate pole sweep - continues from end position
+        if vm_mode == "separate" or vm_mode == "both":
+            if verbose:
+                print(f"  Generating pole sweep for image {img_idx + 1}...")
+            # Continue from the ending yaw position
+            final_yaw = yaw_values[-1]
+            # Calculate frames based on angular distance to maintain constant speed
+            horizontal_distance = abs(yaw_end - yaw_start)
+            degrees_per_frame = horizontal_distance / frames_per_image
+            # Vertical path: 0° -> -85° -> +85° -> 0° = 340° total
+            vertical_distance = 340.0
+            pole_frames = max(1, int(round(vertical_distance / degrees_per_frame)))
+            if verbose:
+                print(f"  Horizontal: {horizontal_distance}° in {frames_per_image} frames ({degrees_per_frame:.2f}°/frame)")
+                print(f"  Vertical: {vertical_distance}° in {pole_frames} frames ({degrees_per_frame:.2f}°/frame)")
+            # Use linear progress for consistent speed throughout
+            pole_progress = _linear_progress(pole_frames)
+            pole_v_values = []
+            # Phase distances: 85° down, 170° up, 85° down
+            total_distance = 340.0
+            phase1_distance = 85.0   # Level to bottom
+            phase2_distance = 170.0  # Bottom to top
+            phase3_distance = 85.0   # Top to level
+            for p in pole_progress:
+                current_distance = p * total_distance
+                if current_distance <= phase1_distance:
+                    # Phase 1: Level (0°) -> Down (-85°)
+                    phase_progress = current_distance / phase1_distance
+                    v_deg = 0.0 - (85.0 * phase_progress)
+                elif current_distance <= phase1_distance + phase2_distance:
+                    # Phase 2: Down (-85°) -> Up (+85°)
+                    phase_progress = (current_distance - phase1_distance) / phase2_distance
+                    v_deg = -85.0 + (170.0 * phase_progress)
+                else:
+                    # Phase 3: Up (+85°) -> Level (0°)
+                    phase_progress = (current_distance - phase1_distance - phase2_distance) / phase3_distance
+                    v_deg = 85.0 - (85.0 * phase_progress)
+                pole_v_values.append(v_deg)
+            for pole_idx, v_deg in tqdm(enumerate(pole_v_values), total=len(pole_v_values), desc=f"Image {img_idx + 1} pole sweep", disable=not verbose):
+                pers = e2p(
+                    e_img,
+                    fov_deg=fov_deg,
+                    h_deg=final_yaw,
+                    v_deg=v_deg,
+                    out_hw=resolution,
+                    mode=interpolation_mode,
+                    channels_first=True,
+                )
+                filename = f"{filename_prefix}_{frame_idx:06d}.{save_format}"
+                path = os.path.join(out_dir, filename)
+                _save_tensor_as_image(pers, path)
+                saved_paths.append(path)
+                frame_idx += 1
+                current_frame += 1
+    if verbose:
+        print(f"\nCompleted! Generated {len(saved_paths)} frames in {out_dir}")
+    return saved_paths
+def main():
+    """
+    Main function - configure your parameters here
+    """
+    # Configuration
+    IMAGE_PATHS = ["path/to/equi_image.jpg"]
+    OUTPUT_DIR = "path/to/output_frames"
+    start_idx = 0
+    # Frame generation settings
+    WIDTH = 1280
+    HEIGHT = 896
+    FPS = 60
+    DURATION_PER_IMAGE = 10.0
+    FOV_HORIZONTAL = 90.0169847156118
+    FOV_VERTICAL = 70
+    # Movement settings
+    SPEED_PROFILE = "constant"  # "constant" or "ease_in_out"
+    START_YAW = 0.0
+    END_YAW = 360.0
+    # Vertical movement (set mode to "none" to disable)
+    VERTICAL_MOVEMENT = {
+        "mode": "separate",  # "none", "during", "separate", or "both"
+        "amplitude_deg": 90.0,
+        "pattern": "sine",  # "sine" or "linear"
+    }
+    # Other settings
+    INTERPOLATION_MODE = "bilinear"  # "bilinear", "bicubic", or "nearest"
+    SAVE_FORMAT = "png"  # "png", "jpg", "jpeg", or "bmp"
+    FILENAME_PREFIX = "sweep360"
+    DEVICE = "cuda:0"
+    # Load images as tensors
+    equi_tensors = []
+    for img_path in IMAGE_PATHS:
+        equi_tensors.append(load_image_to_tensor(img_path, DEVICE))
+    if not equi_tensors:
+        print("No images loaded. Please add your equirectangular images.")
+        return
+    # Generate frames
+    saved_paths = generate_frames_from_equirect(
+        equi_tensors=equi_tensors,
+        out_dir=OUTPUT_DIR,
+        resolution=(HEIGHT, WIDTH),
+        fps=FPS,
+        duration_per_image=DURATION_PER_IMAGE,
+        fov_deg=(FOV_HORIZONTAL, FOV_VERTICAL),
+        interpolation_mode=INTERPOLATION_MODE,
+        speed_profile=SPEED_PROFILE,
+        vertical_movement=VERTICAL_MOVEMENT,
+        start_yaw_deg=START_YAW,
+        end_yaw_deg=END_YAW,
+        save_format=SAVE_FORMAT,
+        filename_prefix=FILENAME_PREFIX,
+        verbose=True,
+        start_frame_index=start_idx,
+    )
+    print(f"Successfully generated {len(saved_paths)} frames")
+if __name__ == "__main__":
+    main()