BrianIsaac's picture
fix: resolve audio integration issues and improve UI layout
e877e4d
"""Text-to-Speech service using ElevenLabs API for on-demand audio generation."""
import os
import logging
import tempfile
from typing import Optional, List, Dict, Any
from elevenlabs.client import AsyncElevenLabs
from elevenlabs import VoiceSettings
logger = logging.getLogger(__name__)
class TTSService:
"""Text-to-Speech service for generating audio narration on-demand."""
def __init__(self, api_key: Optional[str] = None):
"""Initialise TTS service with ElevenLabs API.
Args:
api_key: ElevenLabs API key (uses env var if not provided)
"""
self.api_key = api_key or os.getenv("ELEVENLABS_API_KEY")
if not self.api_key:
logger.warning("ELEVENLABS_API_KEY not set - audio generation will fail")
self.client = None
else:
self.client = AsyncElevenLabs(api_key=self.api_key)
# Default voice: George - professional, neutral male voice
self.default_voice_id = "JBFqnCBsd6RMkjVDRZzb"
def is_available(self) -> bool:
"""Check if TTS service is available."""
return self.client is not None
async def generate_audio(
self,
text: str,
voice_id: Optional[str] = None,
model: str = "eleven_multilingual_v2",
voice_settings: Optional[VoiceSettings] = None
) -> bytes:
"""Generate audio from text.
Args:
text: Text to convert to speech
voice_id: ElevenLabs voice ID (uses default if not provided)
model: ElevenLabs model ID
voice_settings: Optional voice customisation
Returns:
Audio data as bytes (MP3 format)
Raises:
RuntimeError: If TTS service not available
"""
if not self.is_available():
raise RuntimeError("TTS service not available - check ELEVENLABS_API_KEY")
if not text or not text.strip():
raise ValueError("Text cannot be empty")
logger.info(f"Generating audio: {len(text)} characters")
try:
audio_generator = self.client.text_to_speech.convert(
text=text,
voice_id=voice_id or self.default_voice_id,
model_id=model,
voice_settings=voice_settings,
output_format="mp3_44100_128"
)
# Collect audio chunks
audio_chunks = []
async for chunk in audio_generator:
audio_chunks.append(chunk)
audio_data = b"".join(audio_chunks)
logger.info(f"Audio generated: {len(audio_data)} bytes")
return audio_data
except Exception as e:
logger.error(f"Audio generation failed: {e}")
raise
async def generate_analysis_narration(
self,
analysis_text: str,
recommendations: Optional[List[str]] = None
) -> str:
"""Generate audio narration for portfolio analysis.
Args:
analysis_text: Main analysis text/summary
recommendations: Optional list of recommendations
Returns:
Path to generated MP3 file
"""
if not self.is_available():
raise RuntimeError("TTS service not available")
# Build narrative script
script = "Portfolio Analysis Summary.\n\n"
script += analysis_text
if recommendations:
script += "\n\nRecommendations:\n"
for i, rec in enumerate(recommendations, 1):
script += f"\n{i}. {rec}\n"
script += "\n\nThis analysis is for informational purposes only and does not constitute financial advice."
# Generate audio
audio_data = await self.generate_audio(script)
# Save to temporary file
temp_file = tempfile.NamedTemporaryFile(
delete=False,
suffix=".mp3",
prefix="analysis_"
)
temp_file.write(audio_data)
temp_file.close()
logger.info(f"Analysis narration saved to: {temp_file.name}")
return temp_file.name
async def generate_portfolio_narration(
self,
portfolio_summary: str,
holdings: Optional[List[Dict[str, Any]]] = None
) -> str:
"""Generate audio narration for built portfolio.
Args:
portfolio_summary: Portfolio summary text
holdings: Optional list of holdings with ticker and allocation
Returns:
Path to generated MP3 file
"""
if not self.is_available():
raise RuntimeError("TTS service not available")
script = "Portfolio Construction Complete.\n\n"
script += portfolio_summary
if holdings:
script += "\n\nPortfolio Holdings:\n"
for holding in holdings[:10]: # Limit to top 10
ticker = holding.get("ticker", "Unknown")
weight = holding.get("weight", 0)
script += f"{ticker}: {weight:.1f}% allocation. "
script += "\n\nRemember to conduct your own research before making investment decisions."
# Generate audio
audio_data = await self.generate_audio(script)
# Save to temporary file
temp_file = tempfile.NamedTemporaryFile(
delete=False,
suffix=".mp3",
prefix="portfolio_"
)
temp_file.write(audio_data)
temp_file.close()
logger.info(f"Portfolio narration saved to: {temp_file.name}")
return temp_file.name
class DebateAudioGenerator:
"""Generate multi-speaker audio for debate simulation."""
def __init__(self, api_key: Optional[str] = None):
"""Initialise debate audio generator.
Args:
api_key: ElevenLabs API key (uses env var if not provided)
"""
self.api_key = api_key or os.getenv("ELEVENLABS_API_KEY")
if not self.api_key:
logger.warning("ELEVENLABS_API_KEY not set - audio generation will fail")
self.client = None
else:
self.client = AsyncElevenLabs(api_key=self.api_key)
# Voice assignments for debate roles
self.voices = {
"bull": "pNInz6obpgDQGcFmaJgB", # Adam - optimistic, energetic
"bear": "XB0fDUnXU5powFXDhCwa", # Charlotte - cautious, analytical
"consensus": "JBFqnCBsd6RMkjVDRZzb", # George - neutral, professional
"moderator": "EXAVITQu4vr4xnSDxMaL" # Bella - clear, articulate
}
def is_available(self) -> bool:
"""Check if debate audio generator is available."""
return self.client is not None
async def generate_debate_audio(
self,
bull_case: str,
bear_case: str,
consensus: str,
bull_confidence: Optional[float] = None,
bear_confidence: Optional[float] = None,
stance: Optional[str] = None
) -> str:
"""Generate multi-speaker debate simulation audio.
Args:
bull_case: Bull perspective text
bear_case: Bear perspective text
consensus: Consensus recommendation text
bull_confidence: Bull confidence percentage
bear_confidence: Bear confidence percentage
stance: Final stance (bullish/bearish/neutral)
Returns:
Path to generated MP3 file with complete debate
"""
if not self.is_available():
raise RuntimeError("Debate audio generator not available")
logger.info("Generating debate simulation audio")
audio_segments = []
# Introduction
intro_text = "Advisory Council Debate. We will hear from the Bull researcher, followed by the Bear researcher, and conclude with a consensus recommendation."
intro_audio = await self._generate_segment(intro_text, self.voices["moderator"])
audio_segments.append(intro_audio)
audio_segments.append(self._generate_pause(1.0))
# Bull case
bull_intro = f"Bull Case. Confidence level: {bull_confidence:.0f} percent. " if bull_confidence else "Bull Case. "
bull_audio = await self._generate_segment(bull_intro + bull_case, self.voices["bull"])
audio_segments.append(bull_audio)
audio_segments.append(self._generate_pause(1.5))
# Bear case
bear_intro = f"Bear Case. Confidence level: {bear_confidence:.0f} percent. " if bear_confidence else "Bear Case. "
bear_audio = await self._generate_segment(bear_intro + bear_case, self.voices["bear"])
audio_segments.append(bear_audio)
audio_segments.append(self._generate_pause(1.5))
# Consensus
consensus_intro = f"Consensus Recommendation. Final stance: {stance}. " if stance else "Consensus Recommendation. "
consensus_audio = await self._generate_segment(consensus_intro + consensus, self.voices["consensus"])
audio_segments.append(consensus_audio)
# Combine all segments
final_audio = b"".join(audio_segments)
# Save to temporary file
temp_file = tempfile.NamedTemporaryFile(
delete=False,
suffix=".mp3",
prefix="debate_"
)
temp_file.write(final_audio)
temp_file.close()
logger.info(f"Debate audio saved to: {temp_file.name}")
return temp_file.name
async def _generate_segment(self, text: str, voice_id: str) -> bytes:
"""Generate audio segment with specific voice.
Args:
text: Text to convert
voice_id: ElevenLabs voice ID
Returns:
Audio data as bytes
"""
audio_generator = self.client.text_to_speech.convert(
text=text,
voice_id=voice_id,
model_id="eleven_multilingual_v2",
output_format="mp3_44100_128"
)
chunks = []
async for chunk in audio_generator:
chunks.append(chunk)
return b"".join(chunks)
def _generate_pause(self, duration: float) -> bytes:
"""Generate silence pause between segments.
Args:
duration: Pause duration in seconds
Returns:
Silence audio data
"""
# Simple silence: MP3 frame with minimal data
# For production, use proper silent MP3 frames
sample_rate = 44100
silence_samples = int(sample_rate * duration * 0.1) # Simplified
return b'\x00' * silence_samples