BrianIsaac commited on
Commit
e628e1f
Β·
1 Parent(s): 2de8dd4

feat: add ElevenLabs TTS integration with on-demand audio buttons

Browse files

Add text-to-speech narration for portfolio analysis, build, and debate features using ElevenLabs API.

- Create TTSService and DebateAudioGenerator in backend/audio module
- Add on-demand audio buttons to analyse portfolio, build portfolio, and compare strategies pages
- Implement multi-speaker debate audio with distinct voices (bull/bear/consensus)
- Add global state management for audio generation (LAST_BUILD_RESULT, LAST_DEBATE_DATA)
- Configure ElevenLabs API settings in backend/config.py
- Add elevenlabs>=1.0.0 dependency to pyproject.toml
- Update header to reflect 9 MCP servers

Audio generation is triggered on-demand via button clicks after results are available.

Files changed (7) hide show
  1. README.md +6 -4
  2. app.py +300 -7
  3. backend/audio/__init__.py +5 -0
  4. backend/audio/tts_service.py +306 -0
  5. backend/config.py +12 -0
  6. pyproject.toml +2 -0
  7. uv.lock +19 -0
README.md CHANGED
@@ -3,13 +3,15 @@ title: Portfolio Intelligence Platform
3
  emoji: πŸ“Š
4
  colorFrom: blue
5
  colorTo: purple
6
- sdk: docker
7
- app_port: 7860
 
8
  pinned: false
9
  license: mit
10
  short_description: AI portfolio analysis with multi-agent MCP orchestration
11
  tags:
12
- - mcp-in-action-track-consumer
 
13
  - agents
14
  - mcp
15
  - portfolio-analysis
@@ -39,7 +41,7 @@ This architecture demonstrates that agentic systems can be both transparent and
39
 
40
  ## Hackathon Submission
41
 
42
- **Track**: MCP in Action - Consumer
43
 
44
  **Social Media Post**: [TODO: Add link to X/LinkedIn post]
45
 
 
3
  emoji: πŸ“Š
4
  colorFrom: blue
5
  colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.49.1
8
+ app_file: app.py
9
  pinned: false
10
  license: mit
11
  short_description: AI portfolio analysis with multi-agent MCP orchestration
12
  tags:
13
+ - building-mcp-track-enterprise
14
+ - building-mcp-track-customer
15
  - agents
16
  - mcp
17
  - portfolio-analysis
 
41
 
42
  ## Hackathon Submission
43
 
44
+ **Track**: MCP in Action - Consumer | MCP in Action - Enterprise
45
 
46
  **Social Media Post**: [TODO: Add link to X/LinkedIn post]
47
 
app.py CHANGED
@@ -177,6 +177,11 @@ LAST_STRESS_TEST = None
177
  LAST_EXPORT_PDF_PATH = None # Pre-generated PDF export path
178
  LAST_EXPORT_CSV_PATH = None # Pre-generated CSV export path
179
 
 
 
 
 
 
180
  # Loading screen rotating messages with MCP phases and disclaimers
181
  LOADING_MESSAGES = [
182
  "MCP Workflow: Initialising Model Context Protocol servers...",
@@ -964,7 +969,8 @@ async def run_analysis_with_ui_update(
964
  "results",
965
  analysis_text,
966
  performance_metrics,
967
- *charts
 
968
  )
969
 
970
  except Exception as e:
@@ -1798,7 +1804,7 @@ def create_interface() -> gr.Blocks:
1798
  <p>AI-powered portfolio analysis with transparent multi-agent MCP orchestration</p>
1799
  <div class="value-props">
1800
  <div class="value-prop">
1801
- <span>6 MCP Servers</span>
1802
  </div>
1803
  <div class="value-prop">
1804
  <span>Quantitative Models</span>
@@ -2063,6 +2069,21 @@ def create_interface() -> gr.Blocks:
2063
  with gr.Group(elem_classes="preview-card", visible=False) as build_results_container:
2064
  build_status = gr.Markdown("", elem_classes="build-status")
2065
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2066
  with gr.Row():
2067
  build_regenerate_btn = gr.Button("Regenerate", variant="secondary", size="sm")
2068
 
@@ -2126,6 +2147,21 @@ def create_interface() -> gr.Blocks:
2126
  compare_consensus = gr.Markdown("")
2127
  compare_stance = gr.Textbox(label="Stance", interactive=False)
2128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2129
  # Debate transcript
2130
  with gr.Accordion("View Full Debate", open=False):
2131
  compare_debate_transcript = gr.JSON(label="Debate Rounds")
@@ -2431,6 +2467,21 @@ def create_interface() -> gr.Blocks:
2431
  with gr.Column():
2432
  analysis_output = gr.Markdown("")
2433
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2434
  # Performance Metrics Accordion (progressive disclosure)
2435
  with gr.Accordion("Performance Metrics & Reasoning", open=False):
2436
  performance_metrics_output = gr.Markdown("")
@@ -2732,6 +2783,183 @@ def create_interface() -> gr.Blocks:
2732
  test_page: gr.update(visible=True)
2733
  }
2734
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2735
  async def handle_build_portfolio(goals, risk_tolerance, constraints, session_state):
2736
  """Handle the Build Portfolio workflow with streaming updates.
2737
 
@@ -2744,6 +2972,7 @@ def create_interface() -> gr.Blocks:
2744
  Yields:
2745
  Tuple of UI updates: (agent_chat, results_container, status)
2746
  """
 
2747
  logger.info(f"handle_build_portfolio called")
2748
  logger.info(f"Input types - goals: {type(goals).__name__}, risk_tolerance: {type(risk_tolerance).__name__}, constraints: {type(constraints).__name__}")
2749
  logger.info(f"Input values - goals: {goals!r}, risk_tolerance: {risk_tolerance!r}, constraints: {constraints!r}")
@@ -2818,12 +3047,25 @@ def create_interface() -> gr.Blocks:
2818
  logger.error(f"Full traceback:\n{traceback.format_exc()}")
2819
  raise
2820
 
 
 
 
 
 
 
 
 
 
 
 
 
2821
  # Final yield: Show results container
2822
  logger.info(f"Completed streaming. Yielding final result with {len(chat_messages)} messages")
2823
  yield (
2824
  gr.update(value=chat_messages, visible=True), # build_agent_chat (final state, visible)
2825
  gr.update(visible=True), # build_results_container (show results)
2826
- "Portfolio built successfully!" # build_status
 
2827
  )
2828
 
2829
  except Exception as e:
@@ -2906,6 +3148,7 @@ def create_interface() -> gr.Blocks:
2906
  Tuple of UI updates: (debate_chat, results_container, status, bull_case, bull_conf,
2907
  bear_case, bear_conf, consensus, stance, debate_transcript)
2908
  """
 
2909
  if not portfolio_text or not portfolio_text.strip():
2910
  yield (
2911
  gr.update(value=[], visible=False), # compare_debate_chat (empty, hidden)
@@ -2988,6 +3231,18 @@ def create_interface() -> gr.Blocks:
2988
  "confidence": 60 # Default, could parse from title
2989
  }
2990
 
 
 
 
 
 
 
 
 
 
 
 
 
2991
  # Final yield: Show results container with analysis
2992
  yield (
2993
  gr.update(value=chat_messages, visible=True), # compare_debate_chat (final state, visible)
@@ -2999,7 +3254,8 @@ def create_interface() -> gr.Blocks:
2999
  bear_case_data.get("confidence", 0), # compare_bear_confidence
3000
  consensus_data.get("recommendation", ""), # compare_consensus
3001
  consensus_data.get("stance", "Mixed"), # compare_stance
3002
- chat_messages # compare_debate_transcript (use chat messages)
 
3003
  )
3004
 
3005
  except Exception as e:
@@ -3903,7 +4159,8 @@ Please try again with different parameters.
3903
  optimization_plot,
3904
  load_past_portfolio_dropdown,
3905
  export_pdf_btn,
3906
- export_csv_btn
 
3907
  ],
3908
  show_progress="full"
3909
  )
@@ -4440,7 +4697,8 @@ Please try again with different parameters.
4440
  outputs=[
4441
  build_agent_chat,
4442
  build_results_container,
4443
- build_status
 
4444
  ]
4445
  )
4446
 
@@ -4473,7 +4731,8 @@ Please try again with different parameters.
4473
  compare_bear_confidence,
4474
  compare_consensus,
4475
  compare_stance,
4476
- compare_debate_transcript
 
4477
  ]
4478
  )
4479
 
@@ -4532,6 +4791,40 @@ Please try again with different parameters.
4532
  ]
4533
  )
4534
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4535
  return demo
4536
 
4537
 
 
177
  LAST_EXPORT_PDF_PATH = None # Pre-generated PDF export path
178
  LAST_EXPORT_CSV_PATH = None # Pre-generated CSV export path
179
 
180
+ # Global state for audio generation
181
+ LAST_ANALYSIS_TEXT = None # Stores analysis text for audio generation
182
+ LAST_BUILD_RESULT = None # Stores build portfolio result for audio
183
+ LAST_DEBATE_DATA = None # Stores debate data for audio simulation
184
+
185
  # Loading screen rotating messages with MCP phases and disclaimers
186
  LOADING_MESSAGES = [
187
  "MCP Workflow: Initialising Model Context Protocol servers...",
 
969
  "results",
970
  analysis_text,
971
  performance_metrics,
972
+ *charts,
973
+ gr.update(visible=True) # analysis_audio_btn (show audio button)
974
  )
975
 
976
  except Exception as e:
 
1804
  <p>AI-powered portfolio analysis with transparent multi-agent MCP orchestration</p>
1805
  <div class="value-props">
1806
  <div class="value-prop">
1807
+ <span>9 MCP Servers</span>
1808
  </div>
1809
  <div class="value-prop">
1810
  <span>Quantitative Models</span>
 
2069
  with gr.Group(elem_classes="preview-card", visible=False) as build_results_container:
2070
  build_status = gr.Markdown("", elem_classes="build-status")
2071
 
2072
+ # Audio narration button and player
2073
+ with gr.Row():
2074
+ build_audio_btn = gr.Button(
2075
+ "πŸ”Š Listen to Portfolio",
2076
+ variant="secondary",
2077
+ size="sm",
2078
+ visible=False
2079
+ )
2080
+ build_audio_player = gr.Audio(
2081
+ label="Portfolio Summary Audio",
2082
+ interactive=False,
2083
+ visible=False,
2084
+ show_download_button=True
2085
+ )
2086
+
2087
  with gr.Row():
2088
  build_regenerate_btn = gr.Button("Regenerate", variant="secondary", size="sm")
2089
 
 
2147
  compare_consensus = gr.Markdown("")
2148
  compare_stance = gr.Textbox(label="Stance", interactive=False)
2149
 
2150
+ # Audio debate button and player
2151
+ with gr.Row():
2152
+ compare_audio_btn = gr.Button(
2153
+ "🎭 Listen to Debate",
2154
+ variant="secondary",
2155
+ size="sm",
2156
+ visible=False
2157
+ )
2158
+ compare_audio_player = gr.Audio(
2159
+ label="Advisory Council Debate Audio",
2160
+ interactive=False,
2161
+ visible=False,
2162
+ show_download_button=True
2163
+ )
2164
+
2165
  # Debate transcript
2166
  with gr.Accordion("View Full Debate", open=False):
2167
  compare_debate_transcript = gr.JSON(label="Debate Rounds")
 
2467
  with gr.Column():
2468
  analysis_output = gr.Markdown("")
2469
 
2470
+ # Audio narration button and player
2471
+ with gr.Row():
2472
+ analysis_audio_btn = gr.Button(
2473
+ "πŸ”Š Listen to Analysis",
2474
+ variant="secondary",
2475
+ size="sm",
2476
+ visible=False
2477
+ )
2478
+ analysis_audio_player = gr.Audio(
2479
+ label="Audio Summary",
2480
+ interactive=False,
2481
+ visible=False,
2482
+ show_download_button=True
2483
+ )
2484
+
2485
  # Performance Metrics Accordion (progressive disclosure)
2486
  with gr.Accordion("Performance Metrics & Reasoning", open=False):
2487
  performance_metrics_output = gr.Markdown("")
 
2783
  test_page: gr.update(visible=True)
2784
  }
2785
 
2786
+ # ============================================================
2787
+ # AUDIO GENERATION HANDLERS
2788
+ # ============================================================
2789
+
2790
+ async def generate_analysis_audio():
2791
+ """Generate audio narration for portfolio analysis on-demand."""
2792
+ global LAST_ANALYSIS_STATE
2793
+
2794
+ if not LAST_ANALYSIS_STATE:
2795
+ logger.warning("No analysis state available for audio generation")
2796
+ return (
2797
+ gr.update(visible=False), # audio_player
2798
+ gr.update(visible=True), # button
2799
+ )
2800
+
2801
+ try:
2802
+ from backend.audio.tts_service import TTSService
2803
+
2804
+ tts = TTSService()
2805
+ if not tts.is_available():
2806
+ logger.warning("TTS service not available")
2807
+ return (
2808
+ gr.update(visible=False),
2809
+ gr.update(visible=True),
2810
+ )
2811
+
2812
+ # Extract analysis text
2813
+ ai_synthesis = LAST_ANALYSIS_STATE.get("ai_synthesis", "")
2814
+ recommendations = LAST_ANALYSIS_STATE.get("recommendations", [])
2815
+
2816
+ if not ai_synthesis and not recommendations:
2817
+ logger.warning("No analysis content available")
2818
+ return (
2819
+ gr.update(visible=False),
2820
+ gr.update(visible=True),
2821
+ )
2822
+
2823
+ logger.info("Generating audio for analysis...")
2824
+
2825
+ # Generate audio
2826
+ audio_path = await tts.generate_analysis_narration(
2827
+ analysis_text=ai_synthesis[:1000], # Limit to 1000 chars
2828
+ recommendations=recommendations
2829
+ )
2830
+
2831
+ logger.info(f"Audio generated: {audio_path}")
2832
+
2833
+ return (
2834
+ gr.update(value=audio_path, visible=True), # Show audio player
2835
+ gr.update(visible=True), # Keep button visible
2836
+ )
2837
+
2838
+ except Exception as e:
2839
+ logger.error(f"Audio generation failed: {e}")
2840
+ return (
2841
+ gr.update(visible=False),
2842
+ gr.update(visible=True),
2843
+ )
2844
+
2845
+ async def generate_build_audio():
2846
+ """Generate audio narration for built portfolio on-demand."""
2847
+ global LAST_BUILD_RESULT
2848
+
2849
+ if not LAST_BUILD_RESULT:
2850
+ logger.warning("No build result available for audio generation")
2851
+ return (
2852
+ gr.update(visible=False),
2853
+ gr.update(visible=True),
2854
+ )
2855
+
2856
+ try:
2857
+ from backend.audio.tts_service import TTSService
2858
+
2859
+ tts = TTSService()
2860
+ if not tts.is_available():
2861
+ logger.warning("TTS service not available")
2862
+ return (
2863
+ gr.update(visible=False),
2864
+ gr.update(visible=True),
2865
+ )
2866
+
2867
+ # Extract portfolio summary
2868
+ portfolio_summary = LAST_BUILD_RESULT.get("summary", "")
2869
+ holdings = LAST_BUILD_RESULT.get("holdings", [])
2870
+
2871
+ if not portfolio_summary:
2872
+ logger.warning("No portfolio summary available")
2873
+ return (
2874
+ gr.update(visible=False),
2875
+ gr.update(visible=True),
2876
+ )
2877
+
2878
+ logger.info("Generating audio for portfolio...")
2879
+
2880
+ # Generate audio
2881
+ audio_path = await tts.generate_portfolio_narration(
2882
+ portfolio_summary=portfolio_summary[:1000],
2883
+ holdings=holdings
2884
+ )
2885
+
2886
+ logger.info(f"Audio generated: {audio_path}")
2887
+
2888
+ return (
2889
+ gr.update(value=audio_path, visible=True),
2890
+ gr.update(visible=True),
2891
+ )
2892
+
2893
+ except Exception as e:
2894
+ logger.error(f"Audio generation failed: {e}")
2895
+ return (
2896
+ gr.update(visible=False),
2897
+ gr.update(visible=True),
2898
+ )
2899
+
2900
+ async def generate_debate_audio():
2901
+ """Generate multi-speaker debate audio on-demand."""
2902
+ global LAST_DEBATE_DATA
2903
+
2904
+ if not LAST_DEBATE_DATA:
2905
+ logger.warning("No debate data available for audio generation")
2906
+ return (
2907
+ gr.update(visible=False),
2908
+ gr.update(visible=True),
2909
+ )
2910
+
2911
+ try:
2912
+ from backend.audio.tts_service import DebateAudioGenerator
2913
+
2914
+ debate_gen = DebateAudioGenerator()
2915
+ if not debate_gen.is_available():
2916
+ logger.warning("Debate audio generator not available")
2917
+ return (
2918
+ gr.update(visible=False),
2919
+ gr.update(visible=True),
2920
+ )
2921
+
2922
+ # Extract debate data
2923
+ bull_case = LAST_DEBATE_DATA.get("bull_case", "")
2924
+ bear_case = LAST_DEBATE_DATA.get("bear_case", "")
2925
+ consensus = LAST_DEBATE_DATA.get("consensus", "")
2926
+ bull_confidence = LAST_DEBATE_DATA.get("bull_confidence")
2927
+ bear_confidence = LAST_DEBATE_DATA.get("bear_confidence")
2928
+ stance = LAST_DEBATE_DATA.get("stance")
2929
+
2930
+ if not bull_case or not bear_case or not consensus:
2931
+ logger.warning("Incomplete debate data")
2932
+ return (
2933
+ gr.update(visible=False),
2934
+ gr.update(visible=True),
2935
+ )
2936
+
2937
+ logger.info("Generating debate audio...")
2938
+
2939
+ # Generate multi-speaker debate audio
2940
+ audio_path = await debate_gen.generate_debate_audio(
2941
+ bull_case=bull_case[:1000],
2942
+ bear_case=bear_case[:1000],
2943
+ consensus=consensus[:1000],
2944
+ bull_confidence=bull_confidence,
2945
+ bear_confidence=bear_confidence,
2946
+ stance=stance
2947
+ )
2948
+
2949
+ logger.info(f"Debate audio generated: {audio_path}")
2950
+
2951
+ return (
2952
+ gr.update(value=audio_path, visible=True),
2953
+ gr.update(visible=True),
2954
+ )
2955
+
2956
+ except Exception as e:
2957
+ logger.error(f"Debate audio generation failed: {e}")
2958
+ return (
2959
+ gr.update(visible=False),
2960
+ gr.update(visible=True),
2961
+ )
2962
+
2963
  async def handle_build_portfolio(goals, risk_tolerance, constraints, session_state):
2964
  """Handle the Build Portfolio workflow with streaming updates.
2965
 
 
2972
  Yields:
2973
  Tuple of UI updates: (agent_chat, results_container, status)
2974
  """
2975
+ global LAST_BUILD_RESULT
2976
  logger.info(f"handle_build_portfolio called")
2977
  logger.info(f"Input types - goals: {type(goals).__name__}, risk_tolerance: {type(risk_tolerance).__name__}, constraints: {type(constraints).__name__}")
2978
  logger.info(f"Input values - goals: {goals!r}, risk_tolerance: {risk_tolerance!r}, constraints: {constraints!r}")
 
3047
  logger.error(f"Full traceback:\n{traceback.format_exc()}")
3048
  raise
3049
 
3050
+ # Store build result for audio generation
3051
+ if chat_messages:
3052
+ final_message = chat_messages[-1]
3053
+ if isinstance(final_message, dict) and "metadata" in final_message:
3054
+ portfolio_data = final_message.get("metadata", {}).get("portfolio", {})
3055
+ LAST_BUILD_RESULT = {
3056
+ "summary": final_message.get("content", ""),
3057
+ "holdings": portfolio_data.get("holdings", []),
3058
+ "reasoning": final_message.get("metadata", {}).get("reasoning_trace", [])
3059
+ }
3060
+ logger.info("Build result stored for audio generation")
3061
+
3062
  # Final yield: Show results container
3063
  logger.info(f"Completed streaming. Yielding final result with {len(chat_messages)} messages")
3064
  yield (
3065
  gr.update(value=chat_messages, visible=True), # build_agent_chat (final state, visible)
3066
  gr.update(visible=True), # build_results_container (show results)
3067
+ "Portfolio built successfully!", # build_status
3068
+ gr.update(visible=True) # build_audio_btn (show audio button)
3069
  )
3070
 
3071
  except Exception as e:
 
3148
  Tuple of UI updates: (debate_chat, results_container, status, bull_case, bull_conf,
3149
  bear_case, bear_conf, consensus, stance, debate_transcript)
3150
  """
3151
+ global LAST_DEBATE_DATA
3152
  if not portfolio_text or not portfolio_text.strip():
3153
  yield (
3154
  gr.update(value=[], visible=False), # compare_debate_chat (empty, hidden)
 
3231
  "confidence": 60 # Default, could parse from title
3232
  }
3233
 
3234
+ # Store debate data for audio generation
3235
+ if consensus_data and bull_case_data and bear_case_data:
3236
+ LAST_DEBATE_DATA = {
3237
+ "bull_case": bull_case_data.get("thesis", ""),
3238
+ "bear_case": bear_case_data.get("thesis", ""),
3239
+ "consensus": consensus_data.get("recommendation", ""),
3240
+ "bull_confidence": bull_case_data.get("confidence"),
3241
+ "bear_confidence": bear_case_data.get("confidence"),
3242
+ "stance": consensus_data.get("stance", "Mixed")
3243
+ }
3244
+ logger.info("Debate data stored for audio generation")
3245
+
3246
  # Final yield: Show results container with analysis
3247
  yield (
3248
  gr.update(value=chat_messages, visible=True), # compare_debate_chat (final state, visible)
 
3254
  bear_case_data.get("confidence", 0), # compare_bear_confidence
3255
  consensus_data.get("recommendation", ""), # compare_consensus
3256
  consensus_data.get("stance", "Mixed"), # compare_stance
3257
+ chat_messages, # compare_debate_transcript (use chat messages)
3258
+ gr.update(visible=True) # compare_audio_btn (show audio button)
3259
  )
3260
 
3261
  except Exception as e:
 
4159
  optimization_plot,
4160
  load_past_portfolio_dropdown,
4161
  export_pdf_btn,
4162
+ export_csv_btn,
4163
+ analysis_audio_btn # Audio button
4164
  ],
4165
  show_progress="full"
4166
  )
 
4697
  outputs=[
4698
  build_agent_chat,
4699
  build_results_container,
4700
+ build_status,
4701
+ build_audio_btn # Audio button
4702
  ]
4703
  )
4704
 
 
4731
  compare_bear_confidence,
4732
  compare_consensus,
4733
  compare_stance,
4734
+ compare_debate_transcript,
4735
+ compare_audio_btn # Audio button
4736
  ]
4737
  )
4738
 
 
4791
  ]
4792
  )
4793
 
4794
+ # ============================================================
4795
+ # AUDIO BUTTON EVENT HANDLERS
4796
+ # ============================================================
4797
+
4798
+ # Analysis audio button
4799
+ analysis_audio_btn.click(
4800
+ fn=generate_analysis_audio,
4801
+ inputs=[],
4802
+ outputs=[
4803
+ analysis_audio_player,
4804
+ analysis_audio_btn
4805
+ ]
4806
+ )
4807
+
4808
+ # Build portfolio audio button
4809
+ build_audio_btn.click(
4810
+ fn=generate_build_audio,
4811
+ inputs=[],
4812
+ outputs=[
4813
+ build_audio_player,
4814
+ build_audio_btn
4815
+ ]
4816
+ )
4817
+
4818
+ # Compare/debate audio button
4819
+ compare_audio_btn.click(
4820
+ fn=generate_debate_audio,
4821
+ inputs=[],
4822
+ outputs=[
4823
+ compare_audio_player,
4824
+ compare_audio_btn
4825
+ ]
4826
+ )
4827
+
4828
  return demo
4829
 
4830
 
backend/audio/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """Audio generation services for Portfolio Intelligence Platform."""
2
+
3
+ from backend.audio.tts_service import TTSService, DebateAudioGenerator
4
+
5
+ __all__ = ["TTSService", "DebateAudioGenerator"]
backend/audio/tts_service.py ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Text-to-Speech service using ElevenLabs API for on-demand audio generation."""
2
+
3
+ import os
4
+ import logging
5
+ import tempfile
6
+ from typing import Optional, List, Dict, Any
7
+ from elevenlabs.client import AsyncElevenLabs
8
+ from elevenlabs import VoiceSettings
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class TTSService:
14
+ """Text-to-Speech service for generating audio narration on-demand."""
15
+
16
+ def __init__(self, api_key: Optional[str] = None):
17
+ """Initialise TTS service with ElevenLabs API.
18
+
19
+ Args:
20
+ api_key: ElevenLabs API key (uses env var if not provided)
21
+ """
22
+ self.api_key = api_key or os.getenv("ELEVENLABS_API_KEY")
23
+ if not self.api_key:
24
+ logger.warning("ELEVENLABS_API_KEY not set - audio generation will fail")
25
+ self.client = None
26
+ else:
27
+ self.client = AsyncElevenLabs(api_key=self.api_key)
28
+
29
+ # Default voice: George - professional, neutral male voice
30
+ self.default_voice_id = "JBFqnCBsd6RMkjVDRZzb"
31
+
32
+ def is_available(self) -> bool:
33
+ """Check if TTS service is available."""
34
+ return self.client is not None
35
+
36
+ async def generate_audio(
37
+ self,
38
+ text: str,
39
+ voice_id: Optional[str] = None,
40
+ model: str = "eleven_multilingual_v2",
41
+ voice_settings: Optional[VoiceSettings] = None
42
+ ) -> bytes:
43
+ """Generate audio from text.
44
+
45
+ Args:
46
+ text: Text to convert to speech
47
+ voice_id: ElevenLabs voice ID (uses default if not provided)
48
+ model: ElevenLabs model ID
49
+ voice_settings: Optional voice customisation
50
+
51
+ Returns:
52
+ Audio data as bytes (MP3 format)
53
+
54
+ Raises:
55
+ RuntimeError: If TTS service not available
56
+ """
57
+ if not self.is_available():
58
+ raise RuntimeError("TTS service not available - check ELEVENLABS_API_KEY")
59
+
60
+ if not text or not text.strip():
61
+ raise ValueError("Text cannot be empty")
62
+
63
+ logger.info(f"Generating audio: {len(text)} characters")
64
+
65
+ try:
66
+ audio_generator = await self.client.text_to_speech.convert(
67
+ text=text,
68
+ voice_id=voice_id or self.default_voice_id,
69
+ model_id=model,
70
+ voice_settings=voice_settings,
71
+ output_format="mp3_44100_128"
72
+ )
73
+
74
+ # Collect audio chunks
75
+ audio_chunks = []
76
+ async for chunk in audio_generator:
77
+ audio_chunks.append(chunk)
78
+
79
+ audio_data = b"".join(audio_chunks)
80
+ logger.info(f"Audio generated: {len(audio_data)} bytes")
81
+ return audio_data
82
+
83
+ except Exception as e:
84
+ logger.error(f"Audio generation failed: {e}")
85
+ raise
86
+
87
+ async def generate_analysis_narration(
88
+ self,
89
+ analysis_text: str,
90
+ recommendations: Optional[List[str]] = None
91
+ ) -> str:
92
+ """Generate audio narration for portfolio analysis.
93
+
94
+ Args:
95
+ analysis_text: Main analysis text/summary
96
+ recommendations: Optional list of recommendations
97
+
98
+ Returns:
99
+ Path to generated MP3 file
100
+ """
101
+ if not self.is_available():
102
+ raise RuntimeError("TTS service not available")
103
+
104
+ # Build narrative script
105
+ script = "Portfolio Analysis Summary.\n\n"
106
+ script += analysis_text
107
+
108
+ if recommendations:
109
+ script += "\n\nRecommendations:\n"
110
+ for i, rec in enumerate(recommendations, 1):
111
+ script += f"\n{i}. {rec}\n"
112
+
113
+ script += "\n\nThis analysis is for informational purposes only and does not constitute financial advice."
114
+
115
+ # Generate audio
116
+ audio_data = await self.generate_audio(script)
117
+
118
+ # Save to temporary file
119
+ temp_file = tempfile.NamedTemporaryFile(
120
+ delete=False,
121
+ suffix=".mp3",
122
+ prefix="analysis_"
123
+ )
124
+ temp_file.write(audio_data)
125
+ temp_file.close()
126
+
127
+ logger.info(f"Analysis narration saved to: {temp_file.name}")
128
+ return temp_file.name
129
+
130
+ async def generate_portfolio_narration(
131
+ self,
132
+ portfolio_summary: str,
133
+ holdings: Optional[List[Dict[str, Any]]] = None
134
+ ) -> str:
135
+ """Generate audio narration for built portfolio.
136
+
137
+ Args:
138
+ portfolio_summary: Portfolio summary text
139
+ holdings: Optional list of holdings with ticker and allocation
140
+
141
+ Returns:
142
+ Path to generated MP3 file
143
+ """
144
+ if not self.is_available():
145
+ raise RuntimeError("TTS service not available")
146
+
147
+ script = "Portfolio Construction Complete.\n\n"
148
+ script += portfolio_summary
149
+
150
+ if holdings:
151
+ script += "\n\nPortfolio Holdings:\n"
152
+ for holding in holdings[:10]: # Limit to top 10
153
+ ticker = holding.get("ticker", "Unknown")
154
+ weight = holding.get("weight", 0)
155
+ script += f"{ticker}: {weight:.1f}% allocation. "
156
+
157
+ script += "\n\nRemember to conduct your own research before making investment decisions."
158
+
159
+ # Generate audio
160
+ audio_data = await self.generate_audio(script)
161
+
162
+ # Save to temporary file
163
+ temp_file = tempfile.NamedTemporaryFile(
164
+ delete=False,
165
+ suffix=".mp3",
166
+ prefix="portfolio_"
167
+ )
168
+ temp_file.write(audio_data)
169
+ temp_file.close()
170
+
171
+ logger.info(f"Portfolio narration saved to: {temp_file.name}")
172
+ return temp_file.name
173
+
174
+
175
+ class DebateAudioGenerator:
176
+ """Generate multi-speaker audio for debate simulation."""
177
+
178
+ def __init__(self, api_key: Optional[str] = None):
179
+ """Initialise debate audio generator.
180
+
181
+ Args:
182
+ api_key: ElevenLabs API key (uses env var if not provided)
183
+ """
184
+ self.api_key = api_key or os.getenv("ELEVENLABS_API_KEY")
185
+ if not self.api_key:
186
+ logger.warning("ELEVENLABS_API_KEY not set - audio generation will fail")
187
+ self.client = None
188
+ else:
189
+ self.client = AsyncElevenLabs(api_key=self.api_key)
190
+
191
+ # Voice assignments for debate roles
192
+ self.voices = {
193
+ "bull": "pNInz6obpgDQGcFmaJgB", # Adam - optimistic, energetic
194
+ "bear": "XB0fDUnXU5powFXDhCwa", # Charlotte - cautious, analytical
195
+ "consensus": "JBFqnCBsd6RMkjVDRZzb", # George - neutral, professional
196
+ "moderator": "EXAVITQu4vr4xnSDxMaL" # Bella - clear, articulate
197
+ }
198
+
199
+ def is_available(self) -> bool:
200
+ """Check if debate audio generator is available."""
201
+ return self.client is not None
202
+
203
+ async def generate_debate_audio(
204
+ self,
205
+ bull_case: str,
206
+ bear_case: str,
207
+ consensus: str,
208
+ bull_confidence: Optional[float] = None,
209
+ bear_confidence: Optional[float] = None,
210
+ stance: Optional[str] = None
211
+ ) -> str:
212
+ """Generate multi-speaker debate simulation audio.
213
+
214
+ Args:
215
+ bull_case: Bull perspective text
216
+ bear_case: Bear perspective text
217
+ consensus: Consensus recommendation text
218
+ bull_confidence: Bull confidence percentage
219
+ bear_confidence: Bear confidence percentage
220
+ stance: Final stance (bullish/bearish/neutral)
221
+
222
+ Returns:
223
+ Path to generated MP3 file with complete debate
224
+ """
225
+ if not self.is_available():
226
+ raise RuntimeError("Debate audio generator not available")
227
+
228
+ logger.info("Generating debate simulation audio")
229
+
230
+ audio_segments = []
231
+
232
+ # Introduction
233
+ intro_text = "Advisory Council Debate. We will hear from the Bull researcher, followed by the Bear researcher, and conclude with a consensus recommendation."
234
+ intro_audio = await self._generate_segment(intro_text, self.voices["moderator"])
235
+ audio_segments.append(intro_audio)
236
+ audio_segments.append(self._generate_pause(1.0))
237
+
238
+ # Bull case
239
+ bull_intro = f"Bull Case. Confidence level: {bull_confidence:.0f} percent. " if bull_confidence else "Bull Case. "
240
+ bull_audio = await self._generate_segment(bull_intro + bull_case, self.voices["bull"])
241
+ audio_segments.append(bull_audio)
242
+ audio_segments.append(self._generate_pause(1.5))
243
+
244
+ # Bear case
245
+ bear_intro = f"Bear Case. Confidence level: {bear_confidence:.0f} percent. " if bear_confidence else "Bear Case. "
246
+ bear_audio = await self._generate_segment(bear_intro + bear_case, self.voices["bear"])
247
+ audio_segments.append(bear_audio)
248
+ audio_segments.append(self._generate_pause(1.5))
249
+
250
+ # Consensus
251
+ consensus_intro = f"Consensus Recommendation. Final stance: {stance}. " if stance else "Consensus Recommendation. "
252
+ consensus_audio = await self._generate_segment(consensus_intro + consensus, self.voices["consensus"])
253
+ audio_segments.append(consensus_audio)
254
+
255
+ # Combine all segments
256
+ final_audio = b"".join(audio_segments)
257
+
258
+ # Save to temporary file
259
+ temp_file = tempfile.NamedTemporaryFile(
260
+ delete=False,
261
+ suffix=".mp3",
262
+ prefix="debate_"
263
+ )
264
+ temp_file.write(final_audio)
265
+ temp_file.close()
266
+
267
+ logger.info(f"Debate audio saved to: {temp_file.name}")
268
+ return temp_file.name
269
+
270
+ async def _generate_segment(self, text: str, voice_id: str) -> bytes:
271
+ """Generate audio segment with specific voice.
272
+
273
+ Args:
274
+ text: Text to convert
275
+ voice_id: ElevenLabs voice ID
276
+
277
+ Returns:
278
+ Audio data as bytes
279
+ """
280
+ audio_generator = await self.client.text_to_speech.convert(
281
+ text=text,
282
+ voice_id=voice_id,
283
+ model_id="eleven_multilingual_v2",
284
+ output_format="mp3_44100_128"
285
+ )
286
+
287
+ chunks = []
288
+ async for chunk in audio_generator:
289
+ chunks.append(chunk)
290
+
291
+ return b"".join(chunks)
292
+
293
+ def _generate_pause(self, duration: float) -> bytes:
294
+ """Generate silence pause between segments.
295
+
296
+ Args:
297
+ duration: Pause duration in seconds
298
+
299
+ Returns:
300
+ Silence audio data
301
+ """
302
+ # Simple silence: MP3 frame with minimal data
303
+ # For production, use proper silent MP3 frames
304
+ sample_rate = 44100
305
+ silence_samples = int(sample_rate * duration * 0.1) # Simplified
306
+ return b'\x00' * silence_samples
backend/config.py CHANGED
@@ -73,6 +73,18 @@ class Settings(BaseSettings):
73
  validation_alias="ALPACA_SECRET_KEY"
74
  )
75
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  # Application Settings
77
  environment: str = Field(
78
  default="development",
 
73
  validation_alias="ALPACA_SECRET_KEY"
74
  )
75
 
76
+ # ElevenLabs TTS Configuration
77
+ elevenlabs_api_key: Optional[str] = Field(
78
+ default=None,
79
+ validation_alias="ELEVENLABS_API_KEY",
80
+ description="ElevenLabs API key for text-to-speech generation"
81
+ )
82
+ elevenlabs_enabled: bool = Field(
83
+ default=True,
84
+ validation_alias="ELEVENLABS_ENABLED",
85
+ description="Enable/disable audio generation features"
86
+ )
87
+
88
  # Application Settings
89
  environment: str = Field(
90
  default="development",
pyproject.toml CHANGED
@@ -63,6 +63,8 @@ dependencies = [
63
  "python-dateutil>=2.8.0",
64
  # Sentiment Analysis
65
  "vaderSentiment>=3.3.2",
 
 
66
  # Monitoring & Observability
67
  "sentry-sdk[fastapi]>=2.0.0",
68
  "fmp-data>=1.0.2",
 
63
  "python-dateutil>=2.8.0",
64
  # Sentiment Analysis
65
  "vaderSentiment>=3.3.2",
66
+ # Text-to-Speech
67
+ "elevenlabs>=1.0.0",
68
  # Monitoring & Observability
69
  "sentry-sdk[fastapi]>=2.0.0",
70
  "fmp-data>=1.0.2",
uv.lock CHANGED
@@ -1068,6 +1068,23 @@ wheels = [
1068
  { url = "https://files.pythonhosted.org/packages/87/62/9773de14fe6c45c23649e98b83231fffd7b9892b6cf863251dc2afa73643/einops-0.8.1-py3-none-any.whl", hash = "sha256:919387eb55330f5757c6bea9165c5ff5cfe63a642682ea788a6d472576d81737", size = 64359, upload-time = "2025-02-09T03:17:01.998Z" },
1069
  ]
1070
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1071
  [[package]]
1072
  name = "email-validator"
1073
  version = "2.3.0"
@@ -3483,6 +3500,7 @@ dependencies = [
3483
  { name = "anthropic" },
3484
  { name = "arch" },
3485
  { name = "chronos-forecasting" },
 
3486
  { name = "fastapi" },
3487
  { name = "fastmcp" },
3488
  { name = "fmp-data" },
@@ -3541,6 +3559,7 @@ requires-dist = [
3541
  { name = "anthropic", specifier = ">=0.39.0" },
3542
  { name = "arch", specifier = ">=8.0.0" },
3543
  { name = "chronos-forecasting", specifier = ">=1.0.0" },
 
3544
  { name = "fastapi", specifier = ">=0.104.0" },
3545
  { name = "fastmcp", specifier = ">=2.12.5" },
3546
  { name = "fmp-data", specifier = ">=1.0.2" },
 
1068
  { url = "https://files.pythonhosted.org/packages/87/62/9773de14fe6c45c23649e98b83231fffd7b9892b6cf863251dc2afa73643/einops-0.8.1-py3-none-any.whl", hash = "sha256:919387eb55330f5757c6bea9165c5ff5cfe63a642682ea788a6d472576d81737", size = 64359, upload-time = "2025-02-09T03:17:01.998Z" },
1069
  ]
1070
 
1071
+ [[package]]
1072
+ name = "elevenlabs"
1073
+ version = "2.24.0"
1074
+ source = { registry = "https://pypi.org/simple" }
1075
+ dependencies = [
1076
+ { name = "httpx" },
1077
+ { name = "pydantic" },
1078
+ { name = "pydantic-core" },
1079
+ { name = "requests" },
1080
+ { name = "typing-extensions" },
1081
+ { name = "websockets" },
1082
+ ]
1083
+ sdist = { url = "https://files.pythonhosted.org/packages/48/b8/7ef371670766a09169969531bd9cfb6c3bfebe55ae479d2f009cfff4b2f7/elevenlabs-2.24.0.tar.gz", hash = "sha256:bd5278ea8521aeb463ef4649192b5aa8cedf12be7f9b3ca19c79331a7d33d305", size = 418072, upload-time = "2025-11-21T09:15:34.916Z" }
1084
+ wheels = [
1085
+ { url = "https://files.pythonhosted.org/packages/a7/52/a21ebfa6a2cadeb3b234a4bd92a59809fbbcbb6024ee2e48774f30daf8cf/elevenlabs-2.24.0-py3-none-any.whl", hash = "sha256:b8d0435f84a3bc0592dd086b4945bd79d9430c6892bf3f75752c03bb53ae3ad6", size = 1121523, upload-time = "2025-11-21T09:15:31.77Z" },
1086
+ ]
1087
+
1088
  [[package]]
1089
  name = "email-validator"
1090
  version = "2.3.0"
 
3500
  { name = "anthropic" },
3501
  { name = "arch" },
3502
  { name = "chronos-forecasting" },
3503
+ { name = "elevenlabs" },
3504
  { name = "fastapi" },
3505
  { name = "fastmcp" },
3506
  { name = "fmp-data" },
 
3559
  { name = "anthropic", specifier = ">=0.39.0" },
3560
  { name = "arch", specifier = ">=8.0.0" },
3561
  { name = "chronos-forecasting", specifier = ">=1.0.0" },
3562
+ { name = "elevenlabs", specifier = ">=1.0.0" },
3563
  { name = "fastapi", specifier = ">=0.104.0" },
3564
  { name = "fastmcp", specifier = ">=2.12.5" },
3565
  { name = "fmp-data", specifier = ">=1.0.2" },