Spaces:

AndroidGuy
/

Real_Time_diarization

Sleeping

App Files Files Community

Saiyaswanth007 commited on May 25

Commit

7662a6a

1 Parent(s): 6951e54

Revert portg

Browse files

Files changed (1) hide show

app.py +77 -2

app.py CHANGED Viewed

@@ -559,6 +559,60 @@ class RealtimeSpeakerDiarization:
         except Exception as e:
             print(f"Error feeding audio data: {e}")
 # FastRTC Audio Handler for Real-time Diarization
@@ -638,6 +692,16 @@ class DiarizationHandler(AsyncStreamHandler):
             )
         except Exception as e:
             print(f"Error in async audio processing: {e}")
 # Global instances
@@ -1083,8 +1147,19 @@ def create_app(diarization_sys=None):
     # Mount Gradio on FastAPI
     app = gr.mount_gradio_app(fastapi_app, gradio_interface, path="/")
-    # Setup FastRTC stream (will be called after initialization)
-    # Note: The stream setup happens when the system is initialized
     return app, gradio_interface

         except Exception as e:
             print(f"Error feeding audio data: {e}")
+    def process_audio_chunk(self, audio_data, sample_rate=16000):
+        """Process audio chunk from FastRTC input"""
+        if not self.is_running or self.recorder is None:
+            return
+        try:
+            # Convert float audio to int16 for the recorder
+            if audio_data.dtype == np.float32 or audio_data.dtype == np.float64:
+                if np.max(np.abs(audio_data)) <= 1.0:
+                    # Float audio is normalized to [-1, 1], convert to int16
+                    audio_int16 = (audio_data * 32767).astype(np.int16)
+                else:
+                    # Audio is already in higher range
+                    audio_int16 = audio_data.astype(np.int16)
+            else:
+                audio_int16 = audio_data
+            # Ensure correct shape (1, N) for the recorder
+            if len(audio_int16.shape) == 1:
+                audio_int16 = np.expand_dims(audio_int16, 0)
+            # Resample if needed
+            if sample_rate != SAMPLE_RATE:
+                audio_int16 = self._resample_audio(audio_int16, sample_rate, SAMPLE_RATE)
+            # Convert to bytes for feeding to recorder
+            audio_bytes = audio_int16.tobytes()
+            # Feed to recorder
+            self.feed_audio_data(audio_bytes)
+        except Exception as e:
+            print(f"Error processing audio chunk: {e}")
+    def _resample_audio(self, audio, orig_sr, target_sr):
+        """Resample audio to target sample rate"""
+        try:
+            import scipy.signal
+            # Get the resampling ratio
+            ratio = target_sr / orig_sr
+            # Calculate the new length
+            new_length = int(len(audio[0]) * ratio)
+            # Resample the audio
+            resampled = scipy.signal.resample(audio[0], new_length)
+            # Return in the same shape format
+            return np.expand_dims(resampled, 0)
+        except Exception as e:
+            print(f"Error resampling audio: {e}")
+            return audio
 # FastRTC Audio Handler for Real-time Diarization
             )
         except Exception as e:
             print(f"Error in async audio processing: {e}")
+    async def start_up(self) -> None:
+        """Initialize any resources when the stream starts"""
+        print("FastRTC stream started")
+        self.is_processing = True
+    async def shutdown(self) -> None:
+        """Clean up any resources when the stream ends"""
+        print("FastRTC stream shutting down")
+        self.is_processing = False
 # Global instances
     # Mount Gradio on FastAPI
     app = gr.mount_gradio_app(fastapi_app, gradio_interface, path="/")
+    # Setup FastRTC stream
+    if diarization_system is not None:
+        # Initialize the system if not already done
+        if not hasattr(diarization_system, 'encoder') or diarization_system.encoder is None:
+            diarization_system.initialize_models()
+        # Create audio handler if needed
+        global audio_handler
+        if audio_handler is None:
+            audio_handler = DiarizationHandler(diarization_system)
+        # Setup and mount the FastRTC stream
+        setup_fastrtc_stream(app)
     return app, gradio_interface