"""Text-to-Speech service using ElevenLabs API for on-demand audio generation.""" import os import logging import tempfile from typing import Optional, List, Dict, Any from elevenlabs.client import AsyncElevenLabs from elevenlabs import VoiceSettings logger = logging.getLogger(__name__) class TTSService: """Text-to-Speech service for generating audio narration on-demand.""" def __init__(self, api_key: Optional[str] = None): """Initialise TTS service with ElevenLabs API. Args: api_key: ElevenLabs API key (uses env var if not provided) """ self.api_key = api_key or os.getenv("ELEVENLABS_API_KEY") if not self.api_key: logger.warning("ELEVENLABS_API_KEY not set - audio generation will fail") self.client = None else: self.client = AsyncElevenLabs(api_key=self.api_key) # Default voice: George - professional, neutral male voice self.default_voice_id = "JBFqnCBsd6RMkjVDRZzb" def is_available(self) -> bool: """Check if TTS service is available.""" return self.client is not None async def generate_audio( self, text: str, voice_id: Optional[str] = None, model: str = "eleven_multilingual_v2", voice_settings: Optional[VoiceSettings] = None ) -> bytes: """Generate audio from text. Args: text: Text to convert to speech voice_id: ElevenLabs voice ID (uses default if not provided) model: ElevenLabs model ID voice_settings: Optional voice customisation Returns: Audio data as bytes (MP3 format) Raises: RuntimeError: If TTS service not available """ if not self.is_available(): raise RuntimeError("TTS service not available - check ELEVENLABS_API_KEY") if not text or not text.strip(): raise ValueError("Text cannot be empty") logger.info(f"Generating audio: {len(text)} characters") try: audio_generator = self.client.text_to_speech.convert( text=text, voice_id=voice_id or self.default_voice_id, model_id=model, voice_settings=voice_settings, output_format="mp3_44100_128" ) # Collect audio chunks audio_chunks = [] async for chunk in audio_generator: audio_chunks.append(chunk) audio_data = b"".join(audio_chunks) logger.info(f"Audio generated: {len(audio_data)} bytes") return audio_data except Exception as e: logger.error(f"Audio generation failed: {e}") raise async def generate_analysis_narration( self, analysis_text: str, recommendations: Optional[List[str]] = None ) -> str: """Generate audio narration for portfolio analysis. Args: analysis_text: Main analysis text/summary recommendations: Optional list of recommendations Returns: Path to generated MP3 file """ if not self.is_available(): raise RuntimeError("TTS service not available") # Build narrative script script = "Portfolio Analysis Summary.\n\n" script += analysis_text if recommendations: script += "\n\nRecommendations:\n" for i, rec in enumerate(recommendations, 1): script += f"\n{i}. {rec}\n" script += "\n\nThis analysis is for informational purposes only and does not constitute financial advice." # Generate audio audio_data = await self.generate_audio(script) # Save to temporary file temp_file = tempfile.NamedTemporaryFile( delete=False, suffix=".mp3", prefix="analysis_" ) temp_file.write(audio_data) temp_file.close() logger.info(f"Analysis narration saved to: {temp_file.name}") return temp_file.name async def generate_portfolio_narration( self, portfolio_summary: str, holdings: Optional[List[Dict[str, Any]]] = None ) -> str: """Generate audio narration for built portfolio. Args: portfolio_summary: Portfolio summary text holdings: Optional list of holdings with ticker and allocation Returns: Path to generated MP3 file """ if not self.is_available(): raise RuntimeError("TTS service not available") script = "Portfolio Construction Complete.\n\n" script += portfolio_summary if holdings: script += "\n\nPortfolio Holdings:\n" for holding in holdings[:10]: # Limit to top 10 ticker = holding.get("ticker", "Unknown") weight = holding.get("weight", 0) script += f"{ticker}: {weight:.1f}% allocation. " script += "\n\nRemember to conduct your own research before making investment decisions." # Generate audio audio_data = await self.generate_audio(script) # Save to temporary file temp_file = tempfile.NamedTemporaryFile( delete=False, suffix=".mp3", prefix="portfolio_" ) temp_file.write(audio_data) temp_file.close() logger.info(f"Portfolio narration saved to: {temp_file.name}") return temp_file.name class DebateAudioGenerator: """Generate multi-speaker audio for debate simulation.""" def __init__(self, api_key: Optional[str] = None): """Initialise debate audio generator. Args: api_key: ElevenLabs API key (uses env var if not provided) """ self.api_key = api_key or os.getenv("ELEVENLABS_API_KEY") if not self.api_key: logger.warning("ELEVENLABS_API_KEY not set - audio generation will fail") self.client = None else: self.client = AsyncElevenLabs(api_key=self.api_key) # Voice assignments for debate roles self.voices = { "bull": "pNInz6obpgDQGcFmaJgB", # Adam - optimistic, energetic "bear": "XB0fDUnXU5powFXDhCwa", # Charlotte - cautious, analytical "consensus": "JBFqnCBsd6RMkjVDRZzb", # George - neutral, professional "moderator": "EXAVITQu4vr4xnSDxMaL" # Bella - clear, articulate } def is_available(self) -> bool: """Check if debate audio generator is available.""" return self.client is not None async def generate_debate_audio( self, bull_case: str, bear_case: str, consensus: str, bull_confidence: Optional[float] = None, bear_confidence: Optional[float] = None, stance: Optional[str] = None ) -> str: """Generate multi-speaker debate simulation audio. Args: bull_case: Bull perspective text bear_case: Bear perspective text consensus: Consensus recommendation text bull_confidence: Bull confidence percentage bear_confidence: Bear confidence percentage stance: Final stance (bullish/bearish/neutral) Returns: Path to generated MP3 file with complete debate """ if not self.is_available(): raise RuntimeError("Debate audio generator not available") logger.info("Generating debate simulation audio") audio_segments = [] # Introduction intro_text = "Advisory Council Debate. We will hear from the Bull researcher, followed by the Bear researcher, and conclude with a consensus recommendation." intro_audio = await self._generate_segment(intro_text, self.voices["moderator"]) audio_segments.append(intro_audio) audio_segments.append(self._generate_pause(1.0)) # Bull case bull_intro = f"Bull Case. Confidence level: {bull_confidence:.0f} percent. " if bull_confidence else "Bull Case. " bull_audio = await self._generate_segment(bull_intro + bull_case, self.voices["bull"]) audio_segments.append(bull_audio) audio_segments.append(self._generate_pause(1.5)) # Bear case bear_intro = f"Bear Case. Confidence level: {bear_confidence:.0f} percent. " if bear_confidence else "Bear Case. " bear_audio = await self._generate_segment(bear_intro + bear_case, self.voices["bear"]) audio_segments.append(bear_audio) audio_segments.append(self._generate_pause(1.5)) # Consensus consensus_intro = f"Consensus Recommendation. Final stance: {stance}. " if stance else "Consensus Recommendation. " consensus_audio = await self._generate_segment(consensus_intro + consensus, self.voices["consensus"]) audio_segments.append(consensus_audio) # Combine all segments final_audio = b"".join(audio_segments) # Save to temporary file temp_file = tempfile.NamedTemporaryFile( delete=False, suffix=".mp3", prefix="debate_" ) temp_file.write(final_audio) temp_file.close() logger.info(f"Debate audio saved to: {temp_file.name}") return temp_file.name async def _generate_segment(self, text: str, voice_id: str) -> bytes: """Generate audio segment with specific voice. Args: text: Text to convert voice_id: ElevenLabs voice ID Returns: Audio data as bytes """ audio_generator = self.client.text_to_speech.convert( text=text, voice_id=voice_id, model_id="eleven_multilingual_v2", output_format="mp3_44100_128" ) chunks = [] async for chunk in audio_generator: chunks.append(chunk) return b"".join(chunks) def _generate_pause(self, duration: float) -> bytes: """Generate silence pause between segments. Args: duration: Pause duration in seconds Returns: Silence audio data """ # Simple silence: MP3 frame with minimal data # For production, use proper silent MP3 frames sample_rate = 44100 silence_samples = int(sample_rate * duration * 0.1) # Simplified return b'\x00' * silence_samples