import os import tempfile from typing import Any, Dict, Optional from gradio_client import Client, handle_file from .audio_info import validate_audio_path def understand_music( audio_path: Optional[str] = None, audio_file: Optional[bytes] = None, filename: str = "audio", prompt_text: str = "Describe this track in full detail - tell me the genre, tempo, and key, then dive into the instruments, production style, and overall mood it creates.", youtube_url: Optional[str] = None, ) -> Dict[str, Any]: """ Analyze music using NVIDIA's Music-Flamingo Audio Language Model. This function uses the flamingo-3 model to provide detailed analysis of audio content, including genre, tempo, key, instrumentation, production style, and mood. Args: audio_path: Path to local audio file or URL (supports WAV, MP3, FLAC, M4A) audio_file: Raw audio bytes (alternative to audio_path) filename: Original filename for reference (used with audio_file) prompt_text: Custom prompt for analysis (default: comprehensive music description) youtube_url: YouTube URL as alternative audio source Returns: Dictionary with analysis results: { "analysis": "Detailed music analysis text", "audio_source": "path" or "bytes" or "youtube", "filename": "Original filename", "prompt": "Used prompt text", "status": "success" or "error", "error": "Error message if status is error" } Raises: ValueError: If neither audio_path, audio_file, nor youtube_url is provided FileNotFoundError: If audio_path doesn't exist RuntimeError: If API call fails or network issues occur Examples: # Basic analysis with local file result = understand_music(audio_path="song.mp3") print(result["analysis"]) # Custom prompt for finding cut points result = understand_music( audio_path="song.mp3", prompt_text="Identify the best cutting points for editing - suggest specific time stamps where verses, choruses, and bridges begin and end." ) # Analysis with YouTube URL result = understand_music( youtube_url="https://youtube.com/watch?v=example", prompt_text="Analyze the structure and suggest optimal edit points." ) """ try: # Validate input parameters if not any([audio_path, audio_file, youtube_url]): raise ValueError( "Either audio_path, audio_file, or youtube_url must be provided" ) # Handle different audio sources audio_source = None temp_file_path = None source_type = "unknown" source_filename = "unknown" try: if audio_path: # Validate and use local audio file validated_path = validate_audio_path(audio_path) audio_source = handle_file(validated_path) source_type = "path" source_filename = os.path.basename(validated_path) elif audio_file: # Save bytes to temporary file if not filename: raise ValueError("Filename must be provided when using audio_file") # Create temporary file with appropriate extension temp_dir = tempfile.mkdtemp() if filename.lower().endswith((".wav", ".mp3", ".flac", ".m4a")): temp_filename = filename else: temp_filename = f"{filename}.wav" temp_file_path = os.path.join(temp_dir, temp_filename) with open(temp_file_path, "wb") as f: f.write(audio_file) audio_source = handle_file(temp_file_path) source_type = "bytes" source_filename = filename elif youtube_url: # Use YouTube URL directly audio_source = youtube_url source_type = "youtube" source_filename = youtube_url # Initialize client and make prediction client = Client("nvidia/music-flamingo") result = client.predict( audio_path=audio_source, youtube_url=youtube_url if youtube_url else "", prompt_text=prompt_text, api_name="/infer", ) return { "analysis": result, "audio_source": source_type, "filename": source_filename, "prompt": prompt_text, "status": "success", } finally: # Clean up temporary file if created if temp_file_path and os.path.exists(temp_file_path): os.unlink(temp_file_path) # Remove temp directory if empty temp_dir = os.path.dirname(temp_file_path) try: os.rmdir(temp_dir) except OSError: pass # Directory not empty, leave it except Exception as e: return { "analysis": None, "audio_source": audio_path or "bytes" or youtube_url or "unknown", "filename": filename if audio_file else (os.path.basename(audio_path) if audio_path else youtube_url), "prompt": prompt_text, "status": "error", "error": str(e), } def analyze_music_structure( audio_path: Optional[str] = None, audio_file: Optional[bytes] = None, filename: str = "audio", youtube_url: Optional[str] = None, ) -> Dict[str, Any]: """ Analyze music structure and identify sections (verse, chorus, bridge, etc.). This function provides a focused analysis on song structure, making it ideal for understanding where to make cuts and edits. Args: audio_path: Path to local audio file or URL audio_file: Raw audio bytes filename: Original filename for reference youtube_url: YouTube URL as alternative audio source Returns: Dictionary with structure analysis results """ structure_prompt = ( "Analyze the structure of this music track. Identify and timestamp the different sections: " "intro, verses, choruses, pre-chorus, bridge, instrumental breaks, solo sections, and outro/outro. " "Provide specific time stamps (in MM:SS format) for where each section begins and ends. " "Also note any transitions, buildups, or breakdowns that would be important for editing." ) return understand_music( audio_path=audio_path, audio_file=audio_file, filename=filename, prompt_text=structure_prompt, youtube_url=youtube_url, ) def suggest_cutting_points( audio_path: Optional[str] = None, audio_file: Optional[bytes] = None, filename: str = "audio", youtube_url: Optional[str] = None, purpose: str = "general", ) -> Dict[str, Any]: """ Suggest optimal cutting points for audio editing. Args: audio_path: Path to local audio file or URL audio_file: Raw audio bytes filename: Original filename for reference youtube_url: YouTube URL as alternative audio source purpose: Purpose of cutting ('general', 'dj_mix', 'social_media', 'ringtone') Returns: Dictionary with cutting point suggestions """ purpose_prompts = { "general": ( "Suggest the best cutting points for this track. Identify natural edit points where " "the music flows well for cuts. Provide timestamps in MM:SS format and explain why " "each point is good for editing (e.g., clean transitions, beat drops, phrase endings)." ), "dj_mix": ( "Analyze this track for DJ mixing purposes. Identify the best intro and outro sections " "for beatmatching, suggest cue points for mixing, and provide timestamps for clean " "transitions. Focus on drum patterns, BPM consistency, and mixable sections." ), "social_media": ( "Suggest cutting points for social media content (15-60 seconds). Identify the most " "engaging parts of the track, catchy hooks, or impactful moments. Provide timestamps " "for creating short, attention-grabbing clips." ), "ringtone": ( "Identify the best 15-30 second sections for ringtones. Look for memorable melodies, " "catchy choruses, or distinctive instrumental parts. Provide timestamps and explain " "why each section would work well as a ringtone." ), } prompt = purpose_prompts.get(purpose, purpose_prompts["general"]) return understand_music( audio_path=audio_path, audio_file=audio_file, filename=filename, prompt_text=prompt, youtube_url=youtube_url, ) def analyze_genre_and_style( audio_path: Optional[str] = None, audio_file: Optional[bytes] = None, filename: str = "audio", youtube_url: Optional[str] = None, ) -> Dict[str, Any]: """ Provide detailed genre and production style analysis. Args: audio_path: Path to local audio file or URL audio_file: Raw audio bytes filename: Original filename for reference youtube_url: YouTube URL as alternative audio source Returns: Dictionary with genre and style analysis """ genre_prompt = ( "Provide a detailed analysis of this track's genre and production style. Identify the " "primary genre and any subgenres or fusion elements. Describe the production techniques, " "mixing style, sound design choices, and arrangement. Analyze the instrumentation, " "including both traditional and electronic elements. Discuss the era or period the music " "seems to draw inspiration from, and compare it to similar artists or tracks if applicable." ) return understand_music( audio_path=audio_path, audio_file=audio_file, filename=filename, prompt_text=genre_prompt, youtube_url=youtube_url, ) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( description="Music understanding and analysis tools" ) subparsers = parser.add_subparsers(dest="command", help="Available commands") # General understanding understand_parser = subparsers.add_parser( "understand", help="General music analysis" ) understand_parser.add_argument("--audio", help="Path to audio file") understand_parser.add_argument("--prompt", help="Custom prompt text") understand_parser.add_argument("--youtube", help="YouTube URL") # Structure analysis structure_parser = subparsers.add_parser("structure", help="Analyze song structure") structure_parser.add_argument("--audio", help="Path to audio file") structure_parser.add_argument("--youtube", help="YouTube URL") # Cutting points cutting_parser = subparsers.add_parser("cutting", help="Suggest cutting points") cutting_parser.add_argument("--audio", help="Path to audio file") cutting_parser.add_argument( "--purpose", choices=["general", "dj_mix", "social_media", "ringtone"], default="general", help="Purpose of cutting", ) cutting_parser.add_argument("--youtube", help="YouTube URL") # Genre analysis genre_parser = subparsers.add_parser("genre", help="Analyze genre and style") genre_parser.add_argument("--audio", help="Path to audio file") genre_parser.add_argument("--youtube", help="YouTube URL") args = parser.parse_args() try: if args.command == "understand": result = understand_music( audio_path=args.audio, youtube_url=args.youtube, prompt_text=args.prompt if args.prompt else "Describe this track in full detail - tell me the genre, tempo, and key, then dive into the instruments, production style, and overall mood it creates.", ) elif args.command == "cutting": result = suggest_cutting_points( audio_path=args.audio, youtube_url=args.youtube, purpose=args.purpose ) elif args.command == "genre": result = analyze_genre_and_style( audio_path=args.audio, youtube_url=args.youtube ) else: parser.print_help() exit(1) # Output results if result["status"] == "success": print(f"Analysis for: {result['filename']}") print(f"Source: {result['audio_source']}") print(f"Prompt: {result['prompt']}") print("\n" + "=" * 50) print(result["analysis"]) else: print(f"Error: {result['error']}") exit(1) except Exception as e: print(f"Error: {e}") exit(1)