Spaces:

frascuchon
/

music-mcp

Running on CPU Upgrade

App Files Files Community

frascuchon HF Staff commited on 21 days ago

Commit

5dc3b1e

1 Parent(s): a5d8e64

fixing voice removal download URLs

Browse files

Files changed (4) hide show

mcp_server.py +12 -4
tools/audio_cleaning.py +75 -0
tools/audio_insertion.py +193 -0
tools/voice_replacement.py +343 -23

mcp_server.py CHANGED Viewed

@@ -1318,8 +1318,8 @@ def replace_voice_mcp(
     the target audio while preserving the linguistic content and timing.
     Args:
-        source_audio_path: Path to the source audio file (voice to be replaced)
-        target_audio_path: Path to the target audio file (voice to use)
         diffusion_steps: Number of diffusion steps for inference (default: 10)
         length_adjust: Length adjustment factor (default: 1.0)
         inference_cfg_rate: Classifier-free guidance rate (default: 0.7)
@@ -1337,6 +1337,12 @@ def replace_voice_mcp(
         >>> replace_voice_mcp("speech.mp3", "singer.wav", diffusion_steps=15, pitch_shift=2)
         # Returns path to voice-replaced audio with custom settings
     Note:
         - Uses Seed-VC model for high-quality voice conversion
         - Preserves linguistic content and timing from source audio
@@ -1953,11 +1959,13 @@ def create_interface() -> gr.TabbedInterface:
         inputs=[
             gr.Audio(
                 type="filepath",
-                label="Source Audio (voice to be replaced)",
                 sources=["upload"],
             ),
             gr.Audio(
-                type="filepath", label="Target Audio (voice to use)", sources=["upload"]
             ),
             gr.Number(value=10, label="Diffusion Steps", minimum=1, maximum=50),
             gr.Number(value=1.0, label="Length Adjust", minimum=0.1, maximum=3.0),

     the target audio while preserving the linguistic content and timing.
     Args:
+        source_audio_path: Path to the source audio file or URL (voice to be replaced)
+        target_audio_path: Path to the target audio file or URL (voice to use)
         diffusion_steps: Number of diffusion steps for inference (default: 10)
         length_adjust: Length adjustment factor (default: 1.0)
         inference_cfg_rate: Classifier-free guidance rate (default: 0.7)
         >>> replace_voice_mcp("speech.mp3", "singer.wav", diffusion_steps=15, pitch_shift=2)
         # Returns path to voice-replaced audio with custom settings
+        >>> replace_voice_mcp("https://example.com/source.wav", "target.wav")
+        # Downloads source audio and replaces voice with target voice
+        >>> replace_voice_mcp("source.wav", "https://example.com/voice.mp3", pitch_shift=2)
+        # Downloads target voice and applies to source with pitch shift
     Note:
         - Uses Seed-VC model for high-quality voice conversion
         - Preserves linguistic content and timing from source audio
         inputs=[
             gr.Audio(
                 type="filepath",
+                label="Source Audio (voice to be replaced) - Local file or URL",
                 sources=["upload"],
             ),
             gr.Audio(
+                type="filepath",
+                label="Target Audio (voice to use) - Local file or URL",
+                sources=["upload"],
             ),
             gr.Number(value=10, label="Diffusion Steps", minimum=1, maximum=50),
             gr.Number(value=1.0, label="Length Adjust", minimum=0.1, maximum=3.0),

tools/audio_cleaning.py CHANGED Viewed

@@ -256,3 +256,78 @@ def remove_noise(
     except Exception as e:
         raise RuntimeError(f"Error removing noise: {str(e)}")

     except Exception as e:
         raise RuntimeError(f"Error removing noise: {str(e)}")
+def remove_noise_wrapper(audio_path: str, noise_reduction_factor: float = 0.5) -> str:
+    """
+    Wrapper function for noise removal with error handling for MCP integration.
+    Args:
+        audio_path: Path to the input audio file
+        noise_reduction_factor: Noise reduction strength (0.1-1.0, default: 0.5)
+    Returns:
+        Path to cleaned audio file or error message
+    """
+    try:
+        return remove_noise(audio_path, "general", noise_reduction_factor)
+    except Exception as e:
+        return f"Error: {str(e)}"
+if __name__ == "__main__":
+    """
+    Script section for running audio cleaning locally.
+    Usage:
+        python tools/audio_cleaning.py input.wav
+        python tools/audio_cleaning.py input.wav --reduction 0.7
+    """
+    import argparse
+    import sys
+    parser = argparse.ArgumentParser(
+        description="Remove noise from audio files",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python tools/audio_cleaning.py noisy.wav
+  python tools/audio_cleaning.py noisy.wav --reduction 0.7
+  python tools/audio_cleaning.py noisy.wav --output cleaned/
+        """,
+    )
+    parser.add_argument("audio_path", help="Path to the input audio file")
+    parser.add_argument(
+        "--reduction",
+        type=float,
+        default=0.5,
+        help="Noise reduction factor (0.1-1.0, default: 0.5)",
+    )
+    parser.add_argument("--output", help="Output directory (default: output/)")
+    args = parser.parse_args()
+    print("Audio Cleaning Tool")
+    print("=" * 25)
+    print(f"Input: {args.audio_path}")
+    print(f"Noise reduction: {args.reduction}")
+    if args.output:
+        print(f"Output directory: {args.output}")
+    print()
+    try:
+        result = remove_noise_wrapper(
+            audio_path=args.audio_path, noise_reduction_factor=args.reduction
+        )
+        if result.startswith("Error:"):
+            print(f"❌ {result}")
+            sys.exit(1)
+        else:
+            print("✅ Audio cleaning completed!")
+            print(f"Output saved to: {result}")
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        sys.exit(1)

tools/audio_insertion.py CHANGED Viewed

@@ -372,3 +372,196 @@ def replace_section(
     except Exception as e:
         raise RuntimeError(f"Error replacing audio section: {str(e)}")

     except Exception as e:
         raise RuntimeError(f"Error replacing audio section: {str(e)}")
+def insert_section_wrapper(
+    audio_path: str,
+    insert_path: str,
+    insert_time: float,
+    crossfade_duration: float = 0.1,
+    output_format: str = "wav",
+) -> str:
+    """
+    Wrapper function for inserting audio sections with error handling for MCP integration.
+    Args:
+        audio_path: Path to the main audio file
+        insert_path: Path to the audio section to insert
+        insert_time: Time to insert the section (in seconds)
+        crossfade_duration: Length of crossfade in seconds (default: 0.1)
+        output_format: Output format ('wav' or 'mp3', default: 'wav')
+    Returns:
+        Path to output file or error message
+    """
+    try:
+        return insert_section(
+            audio_path=audio_path,
+            section_path=insert_path,
+            insert_time=insert_time,
+            crossfade_duration=crossfade_duration,
+            output_path=None,
+            output_format=output_format,
+        )
+    except Exception as e:
+        return f"Error: {str(e)}"
+def replace_section_wrapper(
+    audio_path: str,
+    start_time: float,
+    end_time: float,
+    replacement_path: str,
+    crossfade_duration: float = 0.1,
+    output_format: str = "wav",
+) -> str:
+    """
+    Wrapper function for replacing audio sections with error handling for MCP integration.
+    Args:
+        audio_path: Path to the main audio file
+        start_time: Start time of section to replace (in seconds)
+        end_time: End time of section to replace (in seconds)
+        replacement_path: Path to the replacement audio segment
+        crossfade_duration: Length of crossfade in seconds (default: 0.1)
+        output_format: Output format ('wav' or 'mp3', default: 'wav')
+    Returns:
+        Path to output file or error message
+    """
+    try:
+        return replace_section(
+            audio_path=audio_path,
+            start_time=start_time,
+            end_time=end_time,
+            replacement_path=replacement_path,
+            crossfade_duration=crossfade_duration,
+            output_path=None,
+            output_format=output_format,
+        )
+    except Exception as e:
+        return f"Error: {str(e)}"
+if __name__ == "__main__":
+    """
+    Script section for running audio insertion/replacement locally.
+    Usage:
+        python tools/audio_insertion.py insert main.wav insert.wav 30.0
+        python tools/audio_insertion.py replace main.wav 10.0 20.0 replacement.wav
+    """
+    import argparse
+    import sys
+    parser = argparse.ArgumentParser(
+        description="Insert or replace audio sections",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Insert section at 30 seconds
+  python tools/audio_insertion.py insert main.wav insert.wav 30.0
+  # Replace section from 10s to 20s
+  python tools/audio_insertion.py replace main.wav 10.0 20.0 replacement.wav
+  # With custom crossfade
+  python tools/audio_insertion.py insert main.wav insert.wav 30.0 --crossfade 0.2
+        """,
+    )
+    subparsers = parser.add_subparsers(dest="command", help="Command to run")
+    # Insert command
+    insert_parser = subparsers.add_parser("insert", help="Insert audio section")
+    insert_parser.add_argument("main", help="Main audio file")
+    insert_parser.add_argument("insert", help="Audio section to insert")
+    insert_parser.add_argument("time", type=float, help="Insert time in seconds")
+    insert_parser.add_argument(
+        "--crossfade",
+        type=float,
+        default=0.1,
+        help="Crossfade duration in seconds (default: 0.1)",
+    )
+    insert_parser.add_argument(
+        "--format",
+        choices=["wav", "mp3"],
+        default="wav",
+        help="Output format (default: wav)",
+    )
+    # Replace command
+    replace_parser = subparsers.add_parser("replace", help="Replace audio section")
+    replace_parser.add_argument("main", help="Main audio file")
+    replace_parser.add_argument("start", type=float, help="Start time in seconds")
+    replace_parser.add_argument("end", type=float, help="End time in seconds")
+    replace_parser.add_argument("replacement", help="Replacement audio section")
+    replace_parser.add_argument(
+        "--crossfade",
+        type=float,
+        default=0.1,
+        help="Crossfade duration in seconds (default: 0.1)",
+    )
+    replace_parser.add_argument(
+        "--format",
+        choices=["wav", "mp3"],
+        default="wav",
+        help="Output format (default: wav)",
+    )
+    args = parser.parse_args()
+    if not args.command:
+        parser.print_help()
+        sys.exit(1)
+    print("Audio Insertion Tool")
+    print("=" * 25)
+    try:
+        result = None
+        if args.command == "insert":
+            print(f"Main audio: {args.main}")
+            print(f"Insert section: {args.insert}")
+            print(f"Insert time: {args.time}s")
+            print(f"Crossfade: {args.crossfade}s")
+            print()
+            result = insert_section_wrapper(
+                audio_path=args.main,
+                insert_path=args.insert,
+                insert_time=args.time,
+                crossfade_duration=args.crossfade,
+                output_format=args.format,
+            )
+        elif args.command == "replace":
+            print(f"Main audio: {args.main}")
+            print(f"Replace section: {args.start}s - {args.end}s")
+            print(f"Replacement: {args.replacement}")
+            print(f"Crossfade: {args.crossfade}s")
+            print()
+            result = replace_section_wrapper(
+                audio_path=args.main,
+                start_time=args.start,
+                end_time=args.end,
+                replacement_path=args.replacement,
+                crossfade_duration=args.crossfade,
+                output_format=args.format,
+            )
+        if result is None:
+            print("❌ No command executed")
+            sys.exit(1)
+        elif result.startswith("Error:"):
+            print(f"❌ {result}")
+            sys.exit(1)
+        else:
+            print(f"✅ Audio {args.command}ion completed!")
+            print(f"Output saved to: {result}")
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        sys.exit(1)

tools/voice_replacement.py CHANGED Viewed

@@ -1,10 +1,138 @@
-import os
 from datetime import datetime
 from pathlib import Path
 from gradio_client import Client, handle_file
-from tools.audio_info import validate_audio_path
 def replace_voice(
@@ -20,13 +148,25 @@ def replace_voice(
     """
     Replace voice in source audio with voice from target audio using Seed-VC.
-    This function uses the Seed-VC Gradio space to perform voice conversion,
-    replacing the voice characteristics in the source audio with those from
-    the target audio while preserving the linguistic content and timing.
     Args:
-        source_audio_path: Path to the source audio file (voice to be replaced)
-        target_audio_path: Path to the target audio file (voice to use)
         diffusion_steps: Number of diffusion steps for inference (default: 10)
         length_adjust: Length adjustment factor (default: 1.0)
         inference_cfg_rate: Classifier-free guidance rate (default: 0.7)
@@ -35,17 +175,26 @@ def replace_voice(
         pitch_shift: Pitch shift in semitones (default: 0)
     Returns:
-        Path to the generated voice-replaced audio file
     Raises:
         FileNotFoundError: If source or target audio files don't exist
         ValueError: If parameters are invalid
         RuntimeError: If voice replacement fails
     """
     try:
-        # Validate input paths
-        source_abs_path = validate_audio_path(source_audio_path)
-        target_abs_path = validate_audio_path(target_audio_path)
         # Validate parameters
         if diffusion_steps < 1 or diffusion_steps > 50:
@@ -57,13 +206,17 @@ def replace_voice(
         if pitch_shift < -12 or pitch_shift > 12:
             raise ValueError("pitch_shift must be between -12 and 12 semitones")
-        # Initialize Seed-VC client
-        client = Client("Plachta/Seed-VC")
         # Perform voice replacement
         result = client.predict(
-            source_audio_path=handle_file(source_abs_path),
-            target_audio_path=handle_file(target_abs_path),
             diffusion_steps=diffusion_steps,
             length_adjust=length_adjust,
             inference_cfg_rate=inference_cfg_rate,
@@ -74,7 +227,7 @@ def replace_voice(
         )
         # Create output directory
-        output_dir = Path("output")
         output_dir.mkdir(exist_ok=True)
         # Generate output filename with timestamp
@@ -86,14 +239,38 @@ def replace_voice(
         )
         output_path = output_dir / output_filename
-        # Save the result
-        if isinstance(result, str) and os.path.exists(result):
-            # If result is a file path, copy it to output location
             import shutil
             shutil.copy2(result, output_path)
         else:
-            # If result is audio data, save it using soundfile
             import soundfile as sf
             sf.write(str(output_path), result, 22050)
@@ -101,7 +278,49 @@ def replace_voice(
         return str(output_path)
     except Exception as e:
-        raise RuntimeError(f"Voice replacement failed: {str(e)}")
 def replace_voice_wrapper(
@@ -118,8 +337,8 @@ def replace_voice_wrapper(
     Wrapper function for voice replacement with error handling for MCP integration.
     Args:
-        source_audio_path: Path to the source audio file
-        target_audio_path: Path to the target audio file
         diffusion_steps: Number of diffusion steps (default: 10)
         length_adjust: Length adjustment factor (default: 1.0)
         inference_cfg_rate: CFG rate (default: 0.7)
@@ -129,6 +348,13 @@ def replace_voice_wrapper(
     Returns:
         Path to generated audio file or error message
     """
     try:
         return replace_voice(
@@ -143,3 +369,97 @@ def replace_voice_wrapper(
         )
     except Exception as e:
         return f"Error: {str(e)}"

+import ssl
+import tempfile
+import urllib.request
 from datetime import datetime
 from pathlib import Path
+from typing import Optional
 from gradio_client import Client, handle_file
+from gradio_client.client import DEFAULT_TEMP_DIR
+# Handle imports for both module and script usage
+try:
+    from tools.audio_info import validate_audio_path
+except ImportError:
+    from audio_info import validate_audio_path
+def resolve_audio_path(audio_path: str) -> str:
+    """
+    Resolve audio path - handle both local files and URLs.
+    Args:
+        audio_path: Path to local audio file or URL
+    Returns:
+        Path to local audio file (downloads if URL)
+    Raises:
+        ValueError: If path is invalid
+        RuntimeError: If URL download fails
+    """
+    if not audio_path:
+        raise ValueError("Audio path cannot be empty")
+    # Check if it's a URL
+    if audio_path.startswith(("http://", "https://")):
+        return download_audio_from_url(audio_path)
+    else:
+        # Handle local file
+        return validate_audio_path(audio_path)
+def download_audio_from_url(url: str, output_path: Optional[str] = None) -> str:
+    """
+    Download audio from URL to temporary file or specified output path.
+    Args:
+        url: URL to audio file
+        output_path: Optional custom output path (if None, uses temp directory)
+    Returns:
+        Path to downloaded file
+    Raises:
+        RuntimeError: If download fails
+    """
+    if output_path:
+        temp_path = output_path
+    else:
+        temp_dir = tempfile.gettempdir()
+        filename = f"voice_replacement_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
+        temp_path = os.path.join(temp_dir, filename)
+    # Try multiple download methods
+    download_methods = [
+        # Method 1: Standard SSL context
+        lambda: _download_with_ssl_context(
+            url, temp_path, ssl.VerifyMode.CERT_REQUIRED
+        ),
+        # Method 2: Relaxed SSL (ignore cert errors)
+        lambda: _download_with_ssl_context(url, temp_path, ssl.VerifyMode.CERT_NONE),
+        # Method 3: No SSL verification
+        lambda: _download_no_ssl(url, temp_path),
+    ]
+    last_error = None
+    for i, download_method in enumerate(download_methods):
+        try:
+            download_method()
+            if not os.path.exists(temp_path) or os.path.getsize(temp_path) == 0:
+                raise RuntimeError(f"Downloaded file is empty or missing: {temp_path}")
+            return temp_path
+        except Exception as e:
+            last_error = e
+            if i < len(download_methods) - 1:
+                # Clean up partial download and try next method
+                if os.path.exists(temp_path):
+                    os.remove(temp_path)
+                continue
+    raise RuntimeError(
+        f"Failed to download audio from URL {url}. Last error: {str(last_error)}"
+    )
+def _download_with_ssl_context(
+    url: str, temp_path: str, verify_mode: ssl.VerifyMode
+) -> None:
+    """Download with specific SSL certificate mode."""
+    ssl_context = ssl.create_default_context()
+    ssl_context.check_hostname = False
+    ssl_context.verify_mode = verify_mode
+    req = urllib.request.Request(url)
+    req.add_header("User-Agent", "Mozilla/5.0 (compatible; Voice-Replacement-Tool/1.0)")
+    with urllib.request.urlopen(req, context=ssl_context) as response:
+        with open(temp_path, "wb") as f:
+            f.write(response.read())
+def _download_no_ssl(url: str, temp_path: str) -> None:
+    """Download without SSL verification."""
+    req = urllib.request.Request(url)
+    req.add_header("User-Agent", "Mozilla/5.0 (compatible; Voice-Replacement-Tool/1.0)")
+    # Open without SSL context
+    with urllib.request.urlopen(req) as response:
+        with open(temp_path, "wb") as f:
+            f.write(response.read())
+def cleanup_temp_file(file_path: str) -> None:
+    """
+    Clean up temporary file if it exists.
+    Args:
+        file_path: Path to temporary file
+    """
+    try:
+        if os.path.exists(file_path) and file_path.startswith(tempfile.gettempdir()):
+            os.remove(file_path)
+    except Exception:
+        # Ignore cleanup errors
+        pass
 def replace_voice(
     """
     Replace voice in source audio with voice from target audio using Seed-VC.
+    This function uses Seed-VC Gradio space to perform voice conversion,
+    replacing voice characteristics in source audio with those from
+    target audio while preserving linguistic content and timing.
+    Examples:
+        >>> replace_voice("source.wav", "target.wav")
+        # Returns 'path/to/source_voice_replaced_by_target_20251126_143022.wav'
+        >>> replace_voice("https://example.com/source.wav", "target.wav", diffusion_steps=15)
+        # Downloads source audio and replaces voice with target voice
+        >>> replace_voice("source.wav", "https://example.com/voice.mp3", pitch_shift=2)
+        # Downloads target voice and applies to source with pitch shift
     Args:
+        source_audio_path: Path to source audio file or URL (voice to be replaced)
+                         Supports local files and HTTP/HTTPS URLs
+        target_audio_path: Path to target audio file or URL (voice to use)
+                         Supports local files and HTTP/HTTPS URLs
         diffusion_steps: Number of diffusion steps for inference (default: 10)
         length_adjust: Length adjustment factor (default: 1.0)
         inference_cfg_rate: Classifier-free guidance rate (default: 0.7)
         pitch_shift: Pitch shift in semitones (default: 0)
     Returns:
+        Path to generated voice-replaced audio file
     Raises:
         FileNotFoundError: If source or target audio files don't exist
         ValueError: If parameters are invalid
         RuntimeError: If voice replacement fails
     """
+    source_temp_file = None
+    target_temp_file = None
     try:
+        # Resolve input paths (handle both URLs and local files)
+        source_abs_path = resolve_audio_path(source_audio_path)
+        target_abs_path = resolve_audio_path(target_audio_path)
+        # Track temporary files for cleanup
+        if source_audio_path.startswith(("http://", "https://")):
+            source_temp_file = source_abs_path
+        if target_audio_path.startswith(("http://", "https://")):
+            target_temp_file = target_abs_path
         # Validate parameters
         if diffusion_steps < 1 or diffusion_steps > 50:
         if pitch_shift < -12 or pitch_shift > 12:
             raise ValueError("pitch_shift must be between -12 and 12 semitones")
+        # Initialize Seed-VC client with manual file handling
+        client = Client("Plachta/Seed-VC", download_files=False)
+        # Prepare file handles for manual upload
+        source_handle = handle_file(source_abs_path)
+        target_handle = handle_file(target_abs_path)
         # Perform voice replacement
         result = client.predict(
+            source_audio_path=source_handle,
+            target_audio_path=target_handle,
             diffusion_steps=diffusion_steps,
             length_adjust=length_adjust,
             inference_cfg_rate=inference_cfg_rate,
         )
         # Create output directory
+        output_dir = Path(DEFAULT_TEMP_DIR)
         output_dir.mkdir(exist_ok=True)
         # Generate output filename with timestamp
         )
         output_path = output_dir / output_filename
+        # Handle result - check if it's a file path or needs manual download
+        if hasattr(result, "url") and result.url:
+            # Result is a file object with URL - download manually
+            download_audio_from_url(result.url, str(output_path))
+        elif isinstance(result, str) and os.path.exists(result):
+            # Result is a local file path - copy it
             import shutil
             shutil.copy2(result, output_path)
+        elif isinstance(result, (tuple, list)):
+            import shutil
+            # Only download the second item if multiple outputs
+            item = result[0]
+            if len(result) > 1:
+                item = result[1]
+            if url:= item.get("url"):
+                # Download each URL to a separate file
+                item_output = str(output_path)
+                download_audio_from_url(url, item_output)
+            elif isinstance(item, str) and os.path.exists(item):
+                # Copy each local file
+                item_output = str(output_path)
+                shutil.copy2(item, item_output)
+            else:
+                raise RuntimeError(f"Unexpected result format in tuple: {item}")
+            shutil.move(item_output, output_path)
         else:
+            # Result is audio data - save it directly
             import soundfile as sf
             sf.write(str(output_path), result, 22050)
         return str(output_path)
     except Exception as e:
+        # Handle specific Seed-VC errors
+        error_msg = str(e)
+        if "403" in error_msg or "Forbidden" in error_msg:
+            raise RuntimeError(
+                "Seed-VC access denied. This may indicate:\n"
+                "1. Files are in unsupported format\n"
+                "2. Files are too large\n"
+                "3. Temporary space restrictions\n"
+                "4. Authentication required\n\n"
+                "TROUBLESHOOTING:\n"
+                "• Try different audio files (WAV, MP3, FLAC, M4A)\n"
+                "• Use smaller files (< 30MB recommended)\n"
+                "• Check if files are corrupted\n"
+                "• Try again later if rate limited\n"
+                "• Consider using a different voice source/target"
+            )
+        elif "404" in error_msg or "Not Found" in error_msg:
+            raise RuntimeError(
+                "Seed-VC cannot find one or both files. "
+                "Check if:\n"
+                "• Files exist and are accessible\n"
+                "• File paths are correct\n"
+                "• Files are in supported format (WAV, MP3, FLAC, M4A)\n"
+                "• Manual download was successful"
+            )
+        elif "timeout" in error_msg.lower():
+            raise RuntimeError(
+                "Seed-VC connection timeout. "
+                "Try:\n"
+                "• Using fewer diffusion steps (5-10)\n"
+                "• Smaller audio files\n"
+                "• Processing again later\n"
+                "• Checking internet connection"
+            )
+        else:
+            raise RuntimeError(f"Voice replacement failed: {error_msg}")
+    finally:
+        # Always clean up temporary files
+        if source_temp_file:
+            cleanup_temp_file(source_temp_file)
+        if target_temp_file:
+            cleanup_temp_file(target_temp_file)
 def replace_voice_wrapper(
     Wrapper function for voice replacement with error handling for MCP integration.
     Args:
+        source_audio_path: Path to input audio file or URL
+        target_audio_path: Path to target audio file or URL
         diffusion_steps: Number of diffusion steps (default: 10)
         length_adjust: Length adjustment factor (default: 1.0)
         inference_cfg_rate: CFG rate (default: 0.7)
     Returns:
         Path to generated audio file or error message
+    Note for URL usage:
+    Some URLs may be blocked by Seed-VC space restrictions.
+    If URL processing fails with access errors, try:
+    1. Download the file manually using your browser
+    2. Save it locally and use the local file path
+    3. Use a different audio source or target
     """
     try:
         return replace_voice(
         )
     except Exception as e:
         return f"Error: {str(e)}"
+if __name__ == "__main__":
+    """
+    Script section for running voice replacement locally.
+    Usage:
+        python tools/voice_replacement.py source.wav target.wav
+        python tools/voice_replacement.py source.wav target.wav --steps 15 --pitch 2
+        python tools/voice_replacement.py https://example.com/source.wav target.wav
+        python tools/voice_replacement.py source.wav https://example.com/target.mp3 --pitch 2
+    """
+    import argparse
+    import sys
+    import os
+    # Add parent directory to path for imports
+    sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+    parser = argparse.ArgumentParser(
+        description="Voice replacement using Seed-VC",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python tools/voice_replacement.py source.wav target.wav
+  python tools/voice_replacement.py source.wav target.wav --steps 15 --pitch 2
+  python tools/voice_replacement.py source.wav target.wav --f0-condition --no-auto-f0
+  python tools/voice_replacement.py https://example.com/source.wav target.wav
+  python tools/voice_replacement.py source.wav https://example.com/target.mp3 --pitch 2
+        """,
+    )
+    parser.add_argument(
+        "source", help="Source audio path or URL (voice to be replaced)"
+    )
+    parser.add_argument("target", help="Target audio path or URL (voice to use)")
+    parser.add_argument(
+        "--steps", type=int, default=10, help="Diffusion steps (1-50, default: 10)"
+    )
+    parser.add_argument(
+        "--length",
+        type=float,
+        default=1.0,
+        help="Length adjustment (0.1-3.0, default: 1.0)",
+    )
+    parser.add_argument(
+        "--cfg",
+        type=float,
+        default=0.7,
+        help="Inference CFG rate (0.0-1.0, default: 0.7)",
+    )
+    parser.add_argument(
+        "--f0-condition", action="store_true", help="Enable F0 conditioning"
+    )
+    parser.add_argument(
+        "--no-auto-f0", action="store_true", help="Disable auto F0 adjustment"
+    )
+    parser.add_argument(
+        "--pitch",
+        type=int,
+        default=0,
+        help="Pitch shift semitones (-12 to 12, default: 0)",
+    )
+    args = parser.parse_args()
+    print("Voice Replacement Tool")
+    print("=" * 30)
+    print(f"Source: {args.source}")
+    print(f"Target: {args.target}")
+    print(f"Parameters: steps={args.steps}, length={args.length}, cfg={args.cfg}")
+    print(
+        f"F0 condition={args.f0_condition}, auto F0={not args.no_auto_f0}, pitch={args.pitch}"
+    )
+    print()
+    try:
+        result = replace_voice(
+            source_audio_path=args.source,
+            target_audio_path=args.target,
+            diffusion_steps=args.steps,
+            length_adjust=args.length,
+            inference_cfg_rate=args.cfg,
+            f0_condition=args.f0_condition,
+            auto_f0_adjust=not args.no_auto_f0,
+            pitch_shift=args.pitch,
+        )
+        print("✅ Voice replacement completed!")
+        print(f"Output saved to: {result}")
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        sys.exit(1)