Spaces:
Sleeping
Sleeping
| """ | |
| Context Cruncher - Gradio Application | |
| Extract structured context data from voice recordings using Gemini AI. | |
| """ | |
| import gradio as gr | |
| import os | |
| from pathlib import Path | |
| import tempfile | |
| from dotenv import load_dotenv | |
| from gemini_processor import ( | |
| process_audio_with_gemini, | |
| create_markdown_file, | |
| create_json_file | |
| ) | |
| # Load environment variables | |
| load_dotenv() | |
| def process_audio( | |
| audio_input, | |
| uploaded_file, | |
| api_key: str, | |
| user_identification: str, | |
| user_name: str = "" | |
| ) -> tuple: | |
| """ | |
| Process audio from either recording or upload. | |
| Args: | |
| audio_input: Audio from microphone recording | |
| uploaded_file: Uploaded audio file | |
| api_key: Gemini API key | |
| user_identification: "name" or "user" | |
| user_name: User's name if using name identification | |
| Returns: | |
| Tuple of (markdown_content, markdown_file, json_file, status_message) | |
| """ | |
| try: | |
| # Validate API key | |
| if not api_key or api_key.strip() == "": | |
| return ( | |
| "", | |
| None, | |
| None, | |
| "Error: Please provide a Gemini API key" | |
| ) | |
| # Determine which audio source to use | |
| audio_path = None | |
| if audio_input is not None: | |
| audio_path = audio_input | |
| elif uploaded_file is not None: | |
| audio_path = uploaded_file.name | |
| if audio_path is None: | |
| return ( | |
| "", | |
| None, | |
| None, | |
| "Error: Please record audio or upload an audio file" | |
| ) | |
| # Determine user reference | |
| user_ref = None | |
| if user_identification == "name": | |
| if not user_name or user_name.strip() == "": | |
| return ( | |
| "", | |
| None, | |
| None, | |
| "Error: Please provide your name when using name identification" | |
| ) | |
| user_ref = user_name.strip() | |
| # Process with Gemini | |
| status_msg = "Processing audio with Gemini API..." | |
| context_markdown, human_readable_name, snake_case_filename = process_audio_with_gemini( | |
| audio_path, | |
| api_key, | |
| user_ref | |
| ) | |
| # Create output files | |
| md_filename, md_content = create_markdown_file( | |
| context_markdown, | |
| human_readable_name, | |
| snake_case_filename | |
| ) | |
| json_filename, json_content = create_json_file( | |
| context_markdown, | |
| human_readable_name, | |
| snake_case_filename | |
| ) | |
| # Write files to temp directory for download | |
| temp_dir = tempfile.mkdtemp() | |
| md_path = Path(temp_dir) / md_filename | |
| json_path = Path(temp_dir) / json_filename | |
| with open(md_path, 'w') as f: | |
| f.write(md_content) | |
| with open(json_path, 'w') as f: | |
| f.write(json_content) | |
| return ( | |
| md_content, | |
| str(md_path), | |
| str(json_path), | |
| f"Success! Context extracted: {human_readable_name}" | |
| ) | |
| except Exception as e: | |
| return ( | |
| "", | |
| None, | |
| None, | |
| f"Error: {str(e)}" | |
| ) | |
| # Custom CSS for better styling | |
| custom_css = """ | |
| .gradio-container { | |
| font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif; | |
| } | |
| .main-header { | |
| text-align: center; | |
| margin-bottom: 1.5rem; | |
| padding-bottom: 1rem; | |
| border-bottom: 2px solid #e5e7eb; | |
| } | |
| .main-header h1 { | |
| font-size: 2rem; | |
| font-weight: 600; | |
| color: #1f2937; | |
| margin-bottom: 0.5rem; | |
| } | |
| .main-header p { | |
| color: #6b7280; | |
| font-size: 1rem; | |
| } | |
| .section-header { | |
| font-weight: 600; | |
| color: #374151; | |
| margin-bottom: 1rem; | |
| } | |
| """ | |
| # Create Gradio interface | |
| with gr.Blocks(css=custom_css, title="Context Cruncher") as demo: | |
| gr.Markdown( | |
| """ | |
| # Context Cruncher | |
| Extract structured context data from voice recordings using AI | |
| """, | |
| elem_classes="main-header" | |
| ) | |
| with gr.Tabs(): | |
| with gr.Tab("Extract"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| with gr.Accordion("Configuration", open=True): | |
| api_key_input = gr.Textbox( | |
| label="Gemini API Key", | |
| placeholder="Enter your Gemini API key", | |
| type="password", | |
| value=os.getenv("GEMINI_API", ""), | |
| info="Get your API key from https://ai.google.dev/" | |
| ) | |
| user_identification = gr.Radio( | |
| choices=["user", "name"], | |
| value="user", | |
| label="User Identification", | |
| info="How should you be referred to in the context data?" | |
| ) | |
| user_name_input = gr.Textbox( | |
| label="Your Name", | |
| placeholder="Enter your name", | |
| visible=False, | |
| info="Used when 'name' is selected above" | |
| ) | |
| gr.Markdown("### Audio Input", elem_classes="section-header") | |
| audio_recording = gr.Audio( | |
| sources=["microphone"], | |
| type="filepath", | |
| label="Record Audio" | |
| ) | |
| gr.Markdown("**OR**") | |
| audio_upload = gr.File( | |
| label="Upload Audio File", | |
| file_types=["audio"], | |
| type="filepath" | |
| ) | |
| process_btn = gr.Button("Extract Context", variant="primary", size="lg") | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Results", elem_classes="section-header") | |
| status_output = gr.Textbox( | |
| label="Status", | |
| interactive=False, | |
| show_label=True | |
| ) | |
| context_display = gr.Textbox( | |
| label="Context Data (Markdown)", | |
| lines=18, | |
| interactive=False, | |
| show_copy_button=True | |
| ) | |
| with gr.Row(): | |
| markdown_download = gr.File(label="Download Markdown") | |
| json_download = gr.File(label="Download JSON") | |
| with gr.Tab("About"): | |
| gr.Markdown( | |
| """ | |
| ## What is Context Cruncher? | |
| Context Cruncher transforms casual voice recordings into clean, structured context data | |
| that AI systems can use for personalization. | |
| **Context data** refers to specific information about users that grounds AI inference | |
| for more personalized results. | |
| ## How It Works | |
| 1. **Configure** - Enter your Gemini API key and choose how you want to be identified | |
| 2. **Input Audio** - Either record directly in your browser or upload an audio file (MP3, WAV, OPUS) | |
| 3. **Extract** - Click the button and let AI clean up your recording into structured context data | |
| 4. **Download** - Get your context data as Markdown or JSON, or copy directly from the text area | |
| ## What Gets Extracted | |
| This tool processes your audio by: | |
| - Removing irrelevant information and tangents | |
| - Eliminating duplicates and redundancy | |
| - Reformatting from first person to third person | |
| - Organizing information hierarchically | |
| - Outputting both Markdown and JSON formats | |
| ## Example Transformation | |
| **Raw Audio:** | |
| > "Okay so... let's document my health problems... I've had asthma since I was a kid. | |
| > I take a daily inhaler called Relvar for that. Oh hey Jay! What's up! | |
| > Okay, where was I... I also take Vyvanse for ADHD." | |
| **Structured Output:** | |
| ```markdown | |
| ## Medical Conditions | |
| - the user has had asthma since childhood | |
| - the user has adult ADHD | |
| ## Medication List | |
| - the user takes Relvar, daily, for asthma | |
| - the user takes Vyvanse for ADHD | |
| ``` | |
| ## Privacy Notice | |
| Your audio is processed using the Gemini API. Review Google's privacy policies | |
| before using this tool with sensitive information. | |
| ## Technical Details | |
| - **AI Model**: Gemini 2.0 Flash (multimodal audio understanding) | |
| - **Processing**: Direct audio file upload to Gemini API | |
| - **Output Formats**: Markdown and JSON | |
| ## Use Cases | |
| - AI assistant personalization | |
| - Knowledge management | |
| - Preference mapping | |
| - Medical history documentation (note privacy considerations) | |
| - Project context capture | |
| """ | |
| ) | |
| # Show/hide name input based on identification method | |
| def toggle_name_input(identification_choice): | |
| return gr.update(visible=identification_choice == "name") | |
| user_identification.change( | |
| fn=toggle_name_input, | |
| inputs=[user_identification], | |
| outputs=[user_name_input] | |
| ) | |
| # Process button click | |
| process_btn.click( | |
| fn=process_audio, | |
| inputs=[ | |
| audio_recording, | |
| audio_upload, | |
| api_key_input, | |
| user_identification, | |
| user_name_input | |
| ], | |
| outputs=[ | |
| context_display, | |
| markdown_download, | |
| json_download, | |
| status_output | |
| ] | |
| ) | |
| if __name__ == "__main__": | |
| # For Hugging Face Spaces, share should be False | |
| # Set server_name to 0.0.0.0 for Spaces compatibility | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False | |
| ) | |