Spaces:
Build error
Build error
Update main.py
Browse files
main.py
CHANGED
|
@@ -13,8 +13,8 @@ import logging
|
|
| 13 |
import os
|
| 14 |
import shutil
|
| 15 |
from pathlib import Path
|
| 16 |
-
import tempfile
|
| 17 |
from tenacity import retry, stop_after_attempt, wait_fixed
|
|
|
|
| 18 |
|
| 19 |
app = FastAPI()
|
| 20 |
|
|
@@ -41,19 +41,24 @@ class TranscriptResponse(BaseModel):
|
|
| 41 |
error: str | None
|
| 42 |
processing_time: float
|
| 43 |
|
| 44 |
-
@retry(stop=stop_after_attempt(3), wait=wait_fixed(
|
| 45 |
def init_driver():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
options = Options()
|
| 47 |
options.add_argument("--headless=new")
|
| 48 |
options.add_argument("--no-sandbox")
|
| 49 |
options.add_argument("--disable-dev-shm-usage")
|
| 50 |
options.add_argument("--disable-gpu")
|
| 51 |
options.add_argument("--disable-extensions")
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
options.add_argument(f"--user-data-dir={user_data_dir}")
|
| 56 |
-
logger.info(f"Using temporary user data directory: {user_data_dir}")
|
| 57 |
|
| 58 |
possible_chrome_paths = [
|
| 59 |
"/usr/bin/google-chrome",
|
|
@@ -83,26 +88,22 @@ def init_driver():
|
|
| 83 |
chrome_version = driver.capabilities["browserVersion"]
|
| 84 |
chromedriver_version = driver.capabilities["chrome"]["chromedriverVersion"].split()[0]
|
| 85 |
logger.info(f"Chrome version: {chrome_version}, ChromeDriver version: {chromedriver_version}")
|
| 86 |
-
return driver
|
| 87 |
except Exception as e:
|
| 88 |
logger.error(f"Driver initialization failed: {str(e)}")
|
| 89 |
-
# Clean up the temporary directory in case of failure
|
| 90 |
-
if Path(user_data_dir).exists():
|
| 91 |
-
shutil.rmtree(user_data_dir, ignore_errors=True)
|
| 92 |
raise Exception(f"Driver initialization failed: {str(e)}")
|
| 93 |
|
| 94 |
@app.post("/transcript", response_model=TranscriptResponse)
|
| 95 |
async def get_transcript(request: VideoRequest):
|
| 96 |
start_time = time.time()
|
| 97 |
driver = None
|
| 98 |
-
user_data_dir = None
|
| 99 |
|
| 100 |
try:
|
| 101 |
video_url = request.url
|
| 102 |
if not ("youtube.com" in video_url or "youtu.be" in video_url):
|
| 103 |
raise HTTPException(status_code=400, detail="Invalid YouTube URL")
|
| 104 |
|
| 105 |
-
driver
|
| 106 |
logger.info(f"Processing URL: {video_url}")
|
| 107 |
driver.get(video_url)
|
| 108 |
|
|
@@ -174,19 +175,25 @@ async def get_transcript(request: VideoRequest):
|
|
| 174 |
finally:
|
| 175 |
if driver:
|
| 176 |
driver.quit()
|
| 177 |
-
|
| 178 |
-
shutil.rmtree(user_data_dir, ignore_errors=True)
|
| 179 |
-
logger.info(f"Cleaned up temporary user data directory: {user_data_dir}")
|
| 180 |
|
| 181 |
@app.get("/health")
|
| 182 |
def health_check():
|
| 183 |
chrome_path = shutil.which("google-chrome")
|
| 184 |
chromedriver_path = shutil.which("chromedriver")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
return {
|
| 186 |
"ChromePath": chrome_path,
|
| 187 |
"ChromeDriverPath": chromedriver_path,
|
| 188 |
"ChromeExists": Path(chrome_path or "").exists(),
|
| 189 |
-
"ChromeDriverExists": Path(chromedriver_path or "").exists()
|
|
|
|
| 190 |
}
|
| 191 |
|
| 192 |
@app.get("/")
|
|
|
|
| 13 |
import os
|
| 14 |
import shutil
|
| 15 |
from pathlib import Path
|
|
|
|
| 16 |
from tenacity import retry, stop_after_attempt, wait_fixed
|
| 17 |
+
import subprocess
|
| 18 |
|
| 19 |
app = FastAPI()
|
| 20 |
|
|
|
|
| 41 |
error: str | None
|
| 42 |
processing_time: float
|
| 43 |
|
| 44 |
+
@retry(stop=stop_after_attempt(3), wait=wait_fixed(3))
|
| 45 |
def init_driver():
|
| 46 |
+
# Clean up any lingering Chrome processes
|
| 47 |
+
try:
|
| 48 |
+
subprocess.run(["pkill", "-f", "chrome"], check=False)
|
| 49 |
+
logger.info("Terminated any existing Chrome processes")
|
| 50 |
+
except Exception as e:
|
| 51 |
+
logger.warning(f"Failed to terminate Chrome processes: {str(e)}")
|
| 52 |
+
|
| 53 |
options = Options()
|
| 54 |
options.add_argument("--headless=new")
|
| 55 |
options.add_argument("--no-sandbox")
|
| 56 |
options.add_argument("--disable-dev-shm-usage")
|
| 57 |
options.add_argument("--disable-gpu")
|
| 58 |
options.add_argument("--disable-extensions")
|
| 59 |
+
# Removed --user-data-dir to avoid conflicts
|
| 60 |
+
options.add_argument("--disable-setuid-sandbox")
|
| 61 |
+
options.add_argument("--remote-debugging-port=9222")
|
|
|
|
|
|
|
| 62 |
|
| 63 |
possible_chrome_paths = [
|
| 64 |
"/usr/bin/google-chrome",
|
|
|
|
| 88 |
chrome_version = driver.capabilities["browserVersion"]
|
| 89 |
chromedriver_version = driver.capabilities["chrome"]["chromedriverVersion"].split()[0]
|
| 90 |
logger.info(f"Chrome version: {chrome_version}, ChromeDriver version: {chromedriver_version}")
|
| 91 |
+
return driver
|
| 92 |
except Exception as e:
|
| 93 |
logger.error(f"Driver initialization failed: {str(e)}")
|
|
|
|
|
|
|
|
|
|
| 94 |
raise Exception(f"Driver initialization failed: {str(e)}")
|
| 95 |
|
| 96 |
@app.post("/transcript", response_model=TranscriptResponse)
|
| 97 |
async def get_transcript(request: VideoRequest):
|
| 98 |
start_time = time.time()
|
| 99 |
driver = None
|
|
|
|
| 100 |
|
| 101 |
try:
|
| 102 |
video_url = request.url
|
| 103 |
if not ("youtube.com" in video_url or "youtu.be" in video_url):
|
| 104 |
raise HTTPException(status_code=400, detail="Invalid YouTube URL")
|
| 105 |
|
| 106 |
+
driver = init_driver()
|
| 107 |
logger.info(f"Processing URL: {video_url}")
|
| 108 |
driver.get(video_url)
|
| 109 |
|
|
|
|
| 175 |
finally:
|
| 176 |
if driver:
|
| 177 |
driver.quit()
|
| 178 |
+
logger.info("Driver closed")
|
|
|
|
|
|
|
| 179 |
|
| 180 |
@app.get("/health")
|
| 181 |
def health_check():
|
| 182 |
chrome_path = shutil.which("google-chrome")
|
| 183 |
chromedriver_path = shutil.which("chromedriver")
|
| 184 |
+
try:
|
| 185 |
+
# Check for running Chrome processes
|
| 186 |
+
result = subprocess.run(["ps", "aux"], capture_output=True, text=True)
|
| 187 |
+
chrome_processes = [line for line in result.stdout.splitlines() if "chrome" in line.lower()]
|
| 188 |
+
chrome_process_count = len(chrome_processes)
|
| 189 |
+
except Exception as e:
|
| 190 |
+
chrome_process_count = f"Error checking processes: {str(e)}"
|
| 191 |
return {
|
| 192 |
"ChromePath": chrome_path,
|
| 193 |
"ChromeDriverPath": chromedriver_path,
|
| 194 |
"ChromeExists": Path(chrome_path or "").exists(),
|
| 195 |
+
"ChromeDriverExists": Path(chromedriver_path or "").exists(),
|
| 196 |
+
"ChromeProcessCount": chrome_process_count
|
| 197 |
}
|
| 198 |
|
| 199 |
@app.get("/")
|