Spaces:

MCP-1st-Birthday
/

science-storyteller

Running

App Files Files Community

science-storyteller / tests /convert_cache_to_mp3.py

tuhulab

chore: Organize repository structure - move docs to docs/ and tests to tests/

28b3cfa 19 days ago

raw

history blame contribute delete

3.8 kB

	"""
	Convert cached WAV files to MP3 for smaller size (GitHub 10MB limit)
	Requires: pydub and ffmpeg
	"""

	import os
	import json
	from pathlib import Path

	try:
	from pydub import AudioSegment
	except ImportError:
	print("Installing pydub...")
	os.system("pip install pydub")
	from pydub import AudioSegment

	def convert_wav_to_mp3(wav_path: str, mp3_path: str, bitrate: str = "128k"):
	"""Convert WAV to MP3."""
	print(f"Converting {wav_path} -> {mp3_path}")
	audio = AudioSegment.from_wav(wav_path)
	audio.export(mp3_path, format="mp3", bitrate=bitrate)

	wav_size = os.path.getsize(wav_path) / (1024 * 1024)
	mp3_size = os.path.getsize(mp3_path) / (1024 * 1024)
	print(f" WAV: {wav_size:.2f}MB -> MP3: {mp3_size:.2f}MB (saved {wav_size - mp3_size:.2f}MB)")

	def update_cache_metadata(cache_dir: Path):
	"""Update metadata.json to point to MP3 files."""
	metadata_file = cache_dir / "metadata.json"

	if not metadata_file.exists():
	print("No metadata.json found")
	return

	with open(metadata_file, 'r') as f:
	metadata = json.load(f)

	for key, entry in metadata.items():
	audio_file = entry.get('audio_file', '')
	if audio_file.endswith('.wav'):
	# Update to MP3
	mp3_file = audio_file.replace('.wav', '.mp3')
	entry['audio_file'] = mp3_file
	print(f"Updated metadata: {audio_file} -> {mp3_file}")

	with open(metadata_file, 'w') as f:
	json.dump(metadata, f, indent=2)

	print(f"\n✅ Updated {metadata_file}")

	def main():
	cache_dir = Path("./cache")
	audio_dir = Path("./assets/audio")

	print("🎵 Converting cached WAV files to MP3\n")
	print("This reduces file size by ~90% to fit GitHub's 10MB limit\n")

	# Load metadata to find all cached audio files
	metadata_file = cache_dir / "metadata.json"
	if not metadata_file.exists():
	print("❌ No cache metadata found. Run generate_cache.py first.")
	return

	with open(metadata_file, 'r') as f:
	metadata = json.load(f)

	converted = 0
	for key, entry in metadata.items():
	audio_file = entry.get('audio_file', '')

	if not audio_file or not audio_file.endswith('.wav'):
	continue

	wav_path = Path(audio_file)
	if not wav_path.exists():
	print(f"⚠️ Not found: {wav_path}")
	continue

	# Convert to MP3
	mp3_path = wav_path.with_suffix('.mp3')

	if mp3_path.exists():
	print(f"✓ Already exists: {mp3_path}")
	else:
	try:
	convert_wav_to_mp3(str(wav_path), str(mp3_path), bitrate="128k")
	converted += 1
	except Exception as e:
	print(f"❌ Error converting {wav_path}: {e}")

	print(f"\n📊 Converted {converted} files")

	# Update metadata
	print("\n🔄 Updating cache metadata...")
	update_cache_metadata(cache_dir)

	# Clean up WAV files
	print("\n🗑️ Removing original WAV files...")
	for key, entry in metadata.items():
	audio_file = entry.get('audio_file', '')
	if audio_file:
	wav_path = Path(audio_file.replace('.mp3', '.wav'))
	if wav_path.exists() and wav_path.suffix == '.wav':
	size_mb = os.path.getsize(wav_path) / (1024 * 1024)
	os.remove(wav_path)
	print(f" Removed {wav_path.name} ({size_mb:.2f}MB)")

	print("\n✅ Conversion complete!")
	print("\nNow you can commit:")
	print(" git add cache/ assets/audio/podcast_*.mp3")
	print(" git commit -m 'Add pre-generated MP3 podcast cache'")
	print(" git push")

	if __name__ == "__main__":
	main()