test / server.js
devusman's picture
tweaks
484d898
const express = require('express');
const puppeteerExtra = require('puppeteer-extra'); // NEW: For stealth
const StealthPlugin = require('puppeteer-extra-plugin-stealth'); // NEW: Stealth plugin
const cors = require('cors');
const { EventEmitter } = require('events');
puppeteerExtra.use(StealthPlugin()); // NEW: Enable stealth plugin
const app = express();
const port = 7860;
app.use(cors());
app.use(express.json());
// --- Progress Tracking and Job Storage --- (Unchanged)
const progressTrackers = new Map();
const downloadJobs = new Map();
class ProgressTracker extends EventEmitter {
constructor(sessionId) {
super();
this.sessionId = sessionId;
this.progress = 0;
this.status = 'initializing';
this.message = '';
}
// oo yeah
updateProgress(progress, status, message) {
this.progress = progress;
this.status = status;
this.message = message;
const update = {
sessionId: this.sessionId,
progress,
status,
message,
timestamp: new Date().toISOString()
};
this.emit('progress', update);
console.log(`πŸ“Š [${this.sessionId}] ${progress}% - ${status}: ${message}`);
}
}
// --- Enhanced Human Behavior Simulation ---
const simulateHumanBehavior = async (page, progressTracker) => {
console.log("πŸ§‘ Simulating human-like mouse movements and delays...");
const viewport = page.viewport();
for (let i = 0; i < 5; i++) {
const x = Math.random() * (viewport.width || 1920);
const y = Math.random() * (viewport.height || 1080);
await page.mouse.move(x, y, { steps: 10 });
await page.waitForTimeout(Math.random() * 1000 + 500);
}
// Random scroll a bit
await page.evaluate(() => {
window.scrollBy(0, Math.random() * 200 - 100);
});
await page.waitForTimeout(Math.random() * 2000 + 1000);
progressTracker?.updateProgress(progressTracker.progress + 1, 'humanizing', 'Human behavior simulated');
};
// --- Enhanced Cloudflare Bypass Function ---
const handleCloudflareChallenge = async (page, progressTracker) => {
progressTracker?.updateProgress(35, 'cloudflare', 'Detecting and bypassing Cloudflare...');
console.log("☁️ Checking for Cloudflare challenge...");
const cloudflareSelectors = [
'#challenge-running',
'.cf-browser-verification',
'[data-ray]',
'#cf-challenge-running',
'.under-attack',
'iframe[src*="cloudflare"]',
'#challenge-form', // Added for JS challenge
'.cf-turnstile' // For Turnstile CAPTCHA
];
// Wait for any Cloudflare element to appear
let challengeDetected = false;
for (const selector of cloudflareSelectors) {
try {
await page.waitForSelector(selector, { timeout: 5000 });
challengeDetected = true;
console.log(`☁️ Cloudflare challenge detected with selector: ${selector}`);
break;
} catch (e) {
// Continue to next selector
}
}
if (challengeDetected) {
// Simulate human behavior before attempting to solve
await simulateHumanBehavior(page, progressTracker);
// Wait for the challenge to resolve (JS execution)
console.log("⏳ Waiting for Cloudflare challenge to complete...");
try {
await page.waitForFunction(() => {
const selectors = [
'#challenge-running',
'.cf-browser-verification',
'[data-ray]',
'#cf-challenge-running',
'.under-attack',
'#challenge-form',
'.cf-turnstile'
];
return !selectors.some(sel => document.querySelector(sel));
}, { timeout: 90000 }); // Increased timeout for slower challenges
} catch (e) {
console.log("⚠️ Standard wait failed, attempting Turnstile click...");
// Fallback: Check for and click Turnstile if present
try {
const cfInput = await page.$('[name="cf-turnstile-response"]');
if (cfInput) {
const parentItem = await cfInput.evaluateHandle((element) => element.parentElement);
const coordinates = await parentItem.boundingBox();
if (coordinates) {
await page.mouse.click(coordinates.x + 25, coordinates.y + coordinates.height / 2);
console.log("πŸ–±οΈ Clicked on Turnstile CAPTCHA");
await page.waitForTimeout(3000);
}
}
// Retry wait after click
await page.waitForFunction(() => {
const selectors = [
'#challenge-running',
'.cf-browser-verification',
'[data-ray]',
'#cf-challenge-running',
'.under-attack',
'#challenge-form',
'.cf-turnstile'
];
return !selectors.some(sel => document.querySelector(sel));
}, { timeout: 60000 });
} catch (clickError) {
console.error("❌ Turnstile click failed:", clickError.message);
throw new Error("Failed to bypass Cloudflare challenge. Try again later or use a proxy.");
}
}
// Additional wait for page to stabilize post-challenge with random delay
const randomDelay = (min, max) => Math.floor(Math.random() * (max - min + 1) + min);
await page.waitForTimeout(randomDelay(3000, 7000));
console.log("βœ… Cloudflare challenge bypassed successfully.");
progressTracker?.updateProgress(38, 'cloudflare', 'Cloudflare bypassed');
} else {
console.log("βœ… No Cloudflare challenge detected.");
}
};
// --- Puppeteer Logic (Updated for Enhanced Cloudflare Bypass) ---
const bypassCookiesAndRestrictions = async (page, progressTracker) => {
progressTracker?.updateProgress(5, 'bypassing', 'Setting up cookie bypass...');
console.log("πŸͺ Starting comprehensive cookie and restriction bypass...");
// Step 1: Set cookies before page load
const preCookies = [
{ name: 'cookieConsent', value: 'accepted', domain: '.studocu.com' },
{ name: 'cookie_consent', value: 'true', domain: '.studocu.com' },
{ name: 'gdpr_consent', value: 'accepted', domain: '.studocu.com' },
{ name: 'privacy_policy_accepted', value: 'true', domain: '.studocu.com' },
{ name: 'user_consent', value: '1', domain: '.studocu.com' },
{ name: 'analytics_consent', value: 'false', domain: '.studocu.com' },
{ name: 'marketing_consent', value: 'false', domain: '.studocu.com' },
{ name: 'functional_consent', value: 'true', domain: '.studocu.com' },
];
for (const cookie of preCookies) {
try {
await page.setCookie(cookie);
} catch (e) {
console.log(`Failed to set cookie ${cookie.name}:`, e.message);
}
}
// Step 2: Inject CSS to hide cookie banners immediately (Updated: Added more selectors for previews and blurred overlays)
await page.addStyleTag({
content: `
/* Hide all possible cookie banners */
[id*="cookie" i]:not(img):not(input), [class*="cookie" i]:not(img):not(input), [data-testid*="cookie" i], [aria-label*="cookie" i],
.gdpr-banner, .gdpr-popup, .gdpr-modal, .consent-banner, .consent-popup, .consent-modal, .privacy-banner, .privacy-popup, .privacy-modal,
.cookie-law, .cookie-policy, .cookie-compliance, .onetrust-banner-sdk, #onetrust-consent-sdk, .cmp-banner, .cmp-popup, .cmp-modal,
[class*="CookieBanner"], [class*="CookieNotice"], [class*="ConsentBanner"], [class*="ConsentManager"], .cc-banner, .cc-window, .cc-compliance,
div[style*="position: fixed"]:has-text("cookie"), div[style*="position: fixed"]:has-text("consent"), .fixed:has-text("cookie"), .fixed:has-text("consent") {
display: none !important;
visibility: hidden !important;
opacity: 0 !important;
z-index: -9999 !important;
pointer-events: none !important;
}
/* Remove blur and premium overlays, including previews */
[class*="blur" i], [class*="premium" i], [class*="paywall" i], [class*="sample-preview-blur" i], [class*="preview" i], [class*="blurred-container" i], [class*="blurred" i] {
display: none !important;
filter: none !important;
backdrop-filter: none !important;
opacity: 1 !important;
visibility: visible !important;
}
/* Ensure document content is visible */
.document-content, .page-content, [data-page] {
filter: none !important;
opacity: 1 !important;
visibility: visible !important;
pointer-events: auto !important;
}
/* Remove fixed overlays */
.fixed-overlay, .sticky-overlay, .content-overlay {
display: none !important;
}
/* Restore scrolling */
html, body {
overflow: auto !important;
position: static !important;
}
/* Hide Cloudflare elements if they persist */
#challenge-running, .cf-browser-verification, [data-ray], .under-attack {
display: none !important;
}
`
});
// Step 3: Inject JavaScript to handle dynamic cookie banners (Unchanged)
await page.evaluateOnNewDocument(() => {
// Override common cookie consent functions
window.cookieConsent = { accepted: true };
window.gtag = () => { };
window.ga = () => { };
window.dataLayer = [];
// Mutation observer to catch dynamically added cookie banners
const observer = new MutationObserver((mutations) => {
mutations.forEach((mutation) => {
mutation.addedNodes.forEach((node) => {
if (node.nodeType === 1) { // Element node
const element = node;
const text = element.textContent || '';
const className = element.className || '';
const id = element.id || '';
// Check if this looks like a cookie banner
if (
text.toLowerCase().includes('cookie') ||
text.toLowerCase().includes('consent') ||
text.toLowerCase().includes('privacy policy') ||
className.toLowerCase().includes('cookie') ||
className.toLowerCase().includes('consent') ||
className.toLowerCase().includes('gdpr') ||
id.toLowerCase().includes('cookie') ||
id.toLowerCase().includes('consent')
) {
console.log('Removing detected cookie banner:', element);
element.remove();
}
}
});
});
});
observer.observe(document.body, { childList: true, subtree: true });
// Set up periodic cleanup
setInterval(() => {
const cookieElements = document.querySelectorAll(`
[id*="cookie" i]:not(img):not(input), [class*="cookie" i]:not(img):not(input), [data-testid*="cookie" i],
.gdpr-banner, .consent-banner, .privacy-banner, .onetrust-banner-sdk, #onetrust-consent-sdk,
.cmp-banner, .cc-banner
`);
cookieElements.forEach(el => el.remove());
// Restore body scroll
document.body.style.overflow = 'auto';
document.documentElement.style.overflow = 'auto';
}, 1000);
});
progressTracker?.updateProgress(10, 'bypassing', 'Cookie bypass configured successfully');
return true;
};
const unblurContent = async (page, progressTracker) => {
progressTracker?.updateProgress(15, 'unblurring', 'Removing content restrictions...');
console.log("πŸ”“ Unblurring content and bypassing premium restrictions...");
await page.evaluate(() => {
const removeRestrictions = () => {
const removeBySelector = (selector) => {
document.querySelectorAll(selector).forEach(el => el.remove());
};
removeBySelector("#adbox, .adsbox, .ad-box, .banner-ads, .advert");
removeBySelector(".PremiumBannerBlobWrapper_overflow-wrapper__xsaS8");
// Added: Remove preview and blurred overlays
removeBySelector('[class*="preview" i], [class*="blurred-container" i], [class*="blurred" i]:not(img)');
const removeBlur = (element = document) => {
element.querySelectorAll("*").forEach(el => {
const style = window.getComputedStyle(el);
if (
style.filter?.includes("blur") ||
style.backdropFilter?.includes("blur") ||
parseFloat(style.opacity) < 1 ||
(el.className && el.className.toString().toLowerCase().includes("blur")) ||
(el.className && el.className.toString().toLowerCase().includes("premium"))
) {
el.style.filter = "none !important";
el.style.backdropFilter = "none !important";
el.style.opacity = "1 !important";
if (el.classList) {
el.classList.remove("blur", "blurred", "premium-blur");
}
}
});
};
removeBlur();
const contentSelectors = [
'.document-content', '.page-content', '.content', '[data-page]', '[data-testid*="document"]',
'[data-testid*="page"]', '.page', '.document-page', 'main', 'article'
];
contentSelectors.forEach(selector => {
document.querySelectorAll(selector).forEach(el => {
el.style.setProperty('filter', 'none', 'important');
el.style.setProperty('opacity', '1', 'important');
el.style.setProperty('visibility', 'visible', 'important');
el.style.setProperty('display', 'block', 'important');
el.style.setProperty('pointer-events', 'auto', 'important');
});
});
};
removeRestrictions();
const intervalId = setInterval(removeRestrictions, 1000);
setTimeout(() => clearInterval(intervalId), 30000);
});
progressTracker?.updateProgress(20, 'unblurring', 'Content restrictions removed');
};
// New function to fetch clear images by modifying blurred URLs
const fetchClearImages = async (page, progressTracker) => {
progressTracker?.updateProgress(65, 'unblurring_images', 'Fetching clear page images...');
console.log("πŸ–ΌοΈ Modifying blurred image URLs to fetch clear versions...");
await page.evaluate(() => {
const images = document.querySelectorAll('img[src*="/blurred/"]');
images.forEach(img => {
img.src = img.src.replace(/\/blurred\//, '/');
console.log(`Modified image src: ${img.src}`);
});
});
// Wait for the modified images to load
await page.evaluate(async () => {
const images = Array.from(document.querySelectorAll('img'));
await Promise.all(images.map(img => {
if (img.complete) return Promise.resolve();
return new Promise((resolve) => {
img.addEventListener('load', resolve);
img.addEventListener('error', resolve);
setTimeout(resolve, 10000);
});
}));
});
await new Promise(resolve => setTimeout(resolve, 3000)); // Additional delay for stability
progressTracker?.updateProgress(70, 'unblurring_images', 'Clear images loaded');
};
const applyPrintStyles = async (page, progressTracker) => {
progressTracker?.updateProgress(85, 'styling', 'Applying print styles...');
console.log("πŸ–¨οΈ Applying print styles for clean PDF...");
await page.evaluate(() => {
const style = document.createElement("style");
style.id = "print-style-extension";
style.innerHTML = `
@page {
/* Set page size to A4 and remove default margins */
size: A4 portrait;
margin: 0mm;
}
@media print {
html, body {
/* Ensure the body takes the full width and has no extra padding/margin */
width: 210mm !important;
height: auto !important;
margin: 0 !important;
padding: 0 !important;
overflow: visible !important;
background: white !important;
color: black !important;
display: flex;
justify-content: center;
}
/* Remove all unwanted elements like headers, footers, sidebars, etc. */
header, footer, nav, aside, .no-print, .ads, .sidebar, .premium-banner,
[class*="Header"], [class*="Footer"], [class*="Sidebar"], [id*="Header"],
.ViewerToolbar, .Layout_info-bar-wrapper__He0Ho, .Sidebar_sidebar-scrollable__kqeBZ,
.HeaderWrapper_header-wrapper__mCmf3, .Layout_visible-content-bottom-wrapper-sticky__yaaAB,
.Layout_bottom-section-wrapper__yBWWk, .Layout_footer-wrapper__bheJQ,
.InlineBanner_inline-banner-wrapper__DAi5X, .banner-wrapper, #top-bar-wrapper,
.Layout_sidebar-wrapper__unavM, .Layout_is-open__9DQr4 {
display: none !important;
}
/* Force all elements to have a transparent background and no shadow */
* {
box-shadow: none !important;
background: transparent !important;
color: inherit !important;
}
/*
* KEY FIX: Target the main document container.
* Force it to be a block element, remove any transforms or max-widths,
* and center it perfectly within the page.
*/
.Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ,
.Viewer_document-wrapper__XsO4j, .page-content, .document-viewer, #page-container {
position: static !important;
display: block !important;
width: 100% !important;
max-width: none !important;
margin: 0 auto !important; /* Center horizontally */
padding: 0 !important;
box-sizing: border-box; /* Include padding in width calculation */
transform: none !important;
}
/* Ensure individual pages and images within the document use the full width */
[data-page], .page, .document-page, img {
page-break-after: always !important;
page-break-inside: avoid !important;
page-break-before: avoid !important;
width: 100% !important;
max-width: 100% !important;
height: auto !important;
display: block !important;
margin: 0 !important;
padding: 0 !important;
}
}
`;
document.head.appendChild(style);
});
progressTracker?.updateProgress(88, 'styling', 'Print styles applied successfully');
};
const studocuDownloader = async (url, options = {}, progressTracker = null) => {
let browser;
try {
progressTracker?.updateProgress(0, 'initializing', 'Starting browser...');
console.log("πŸš€ Launching browser with enhanced stealth configuration...");
browser = await puppeteerExtra.launch({ // UPDATED: Use puppeteerExtra
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--no-first-run',
'--no-zygote',
'--disable-gpu',
'--disable-features=VizDisplayCompositor',
'--disable-background-networking',
'--disable-background-timer-throttling',
'--disable-renderer-backgrounding',
'--disable-backgrounding-occluded-windows',
'--disable-ipc-flooding-protection',
'--disable-web-security',
'--disable-features=site-per-process',
'--disable-blink-features=AutomationControlled',
'--disable-extensions',
'--ignore-certificate-errors',
// NEW: Additional args for better Cloudflare evasion
'--disable-features=TranslateUI',
'--disable-ipc-flooding',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
'--disable-features=TranslateUI,BlinkGenPropertyTrees',
'--metrics-recording-only',
'--no-default-browser-check',
'--safebrowsing-disable-auto-update',
'--password-store=basic',
'--use-mock-keychain'
],
ignoreHTTPSErrors: true,
timeout: 300000,
});
const page = await browser.newPage();
progressTracker?.updateProgress(2, 'initializing', 'Configuring browser settings...');
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
await page.setViewport({ width: 1920, height: 1080 }); // NEW: Use full HD for more realistic viewport, adjust back if needed for A4
// NOTE: Stealth plugin handles most of this, but keeping for extra safety
await page.evaluateOnNewDocument(() => {
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });
// NEW: Additional stealth evasions
Object.defineProperty(navigator, 'permissions', {
get: () => ({
query: () => Promise.resolve({ state: 'granted' })
})
});
window.chrome = {
runtime: {},
loadTimes: function () { },
csi: function () { },
app: {}
};
});
// Set up cookie and content bypass
await bypassCookiesAndRestrictions(page, progressTracker);
// Block unnecessary resources (UPDATED: Loosened for Cloudflare - allow cloudflare.com requests)
await page.setRequestInterception(true);
page.on('request', (req) => {
const resourceType = req.resourceType();
const reqUrl = req.url().toLowerCase();
if (resourceType === 'document') {
req.continue();
return;
}
// NEW: Always allow Cloudflare-related requests
if (reqUrl.includes('cloudflare') || reqUrl.includes('cf-')) {
req.continue();
return;
}
if (
['image', 'media', 'font', 'stylesheet'].includes(resourceType) && // Block non-essential images/media/fonts/styles early if not core
!reqUrl.includes('document') && !reqUrl.includes('page') && !reqUrl.includes('studocu') || // Allow core document images
resourceType === 'script' && !reqUrl.includes('studocu') && !reqUrl.includes('cloudflare') || // Block third-party scripts except Cloudflare
reqUrl.includes('doubleclick') ||
reqUrl.includes('googletagmanager') ||
reqUrl.includes('facebook.com') ||
reqUrl.includes('twitter.com') ||
reqUrl.includes('analytics') ||
reqUrl.includes('gtm') ||
reqUrl.includes('hotjar') ||
reqUrl.includes('mixpanel') ||
reqUrl.includes('onetrust') ||
reqUrl.includes('cookielaw') ||
(resourceType === 'other' && reqUrl.includes('/track/'))
) {
req.abort();
} else {
req.continue();
}
});
// Login if credentials provided
if (options.email && options.password) {
progressTracker?.updateProgress(12, 'authenticating', 'Logging into StuDocu...');
console.log("πŸ”‘ Logging in to StuDocu...");
await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded', timeout: 120000 });
// NEW: Handle potential Cloudflare on login page
await handleCloudflareChallenge(page, progressTracker);
await page.waitForSelector('#email', { timeout: 15000 });
await page.type('#email', options.email);
await page.type('#password', options.password);
await page.click('button[type="submit"]');
try {
await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 30000 });
await page.waitForSelector('.user-profile, [data-testid="user-menu"]', { timeout: 10000 });
console.log("βœ… Login successful.");
progressTracker?.updateProgress(18, 'authenticated', 'Login successful');
} catch (e) {
console.error("❌ Login failed:", e.message);
throw new Error("Login failed. Check credentials or try again.");
}
}
// Removed homepage visit as it's not strictly necessary for session setup; directly navigate to URL
progressTracker?.updateProgress(30, 'navigating', 'Navigating to document...');
console.log(`πŸ“„ Navigating to ${url}...`);
let navigationSuccess = false;
let attempts = 0;
const maxAttempts = 3; // Reduced from 5 to minimize retries
while (!navigationSuccess && attempts < maxAttempts) {
try {
attempts++;
progressTracker?.updateProgress(30 + (attempts * 5), 'navigating', `Navigation attempt ${attempts}/${maxAttempts}`);
console.log(`Navigation attempt ${attempts}/${maxAttempts}`);
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 120000 }); // Increased from 60000
navigationSuccess = true;
} catch (e) {
console.log(`Navigation attempt ${attempts} failed:`, e.message);
if (attempts >= maxAttempts) throw e;
await new Promise(resolve => setTimeout(resolve, 10000)); // Increased retry delay to 10s for stability
}
}
// NEW: Handle Cloudflare after navigation
await handleCloudflareChallenge(page, progressTracker);
progressTracker?.updateProgress(40, 'loading', 'Page loaded, waiting for content...');
await new Promise(resolve => setTimeout(resolve, 5000)); // Increased from 2000ms for better loading
// Apply content unblurring
await unblurContent(page, progressTracker);
// Wait for document content
progressTracker?.updateProgress(45, 'loading', 'Waiting for document content...');
console.log("⏳ Waiting for document content to load...");
const contentSelectors = [
'.document-content', '.page-content', '[data-page]', '[data-testid*="document"]',
'img[src*="document"]', 'img[src*="page"]', '.page', 'main img', 'article img'
];
let contentFound = false;
for (const selector of contentSelectors) {
try {
await page.waitForSelector(selector, { timeout: 20000 }); // Increased from 10000
console.log(`βœ… Found content with selector: ${selector}`);
contentFound = true;
break;
} catch (e) {
console.log(`❌ Selector ${selector} not found, trying next...`);
}
}
if (!contentFound) {
console.log("⚠️ No specific content selector found, proceeding with page content...");
}
// Enhanced scrolling to load all content (Optimized: Increased scroll distance, reduced delays)
progressTracker?.updateProgress(50, 'scrolling', 'Loading all document pages...');
console.log("πŸ“œ Loading all document pages with enhanced slow scroll...");
await page.evaluate(async () => {
const delay = (ms) => new Promise((res) => setTimeout(res, ms));
let scrollHeight = document.body.scrollHeight;
while (true) {
let totalHeight = 0;
const distance = 600; // Increased from 300 for faster coverage
while (totalHeight < scrollHeight) {
window.scrollBy(0, distance);
totalHeight += distance;
await delay(300); // Increased from 200ms for large docs stability
}
await delay(2000); // Increased from 1000ms
const newHeight = document.body.scrollHeight;
if (newHeight === scrollHeight) break;
scrollHeight = newHeight;
}
window.scrollTo({ top: 0, behavior: "smooth" });
await delay(1000); // Increased from 500ms
});
// Re-apply unblur after loading new content
await unblurContent(page, progressTracker);
// New: Fetch clear images for blurred pages
await fetchClearImages(page, progressTracker);
// Wait for all images to load (Optimized: Reduced per-image timeout, parallel wait)
progressTracker?.updateProgress(75, 'loading_images', 'Loading images...');
console.log("πŸ–ΌοΈ Waiting for all images to load...");
await page.evaluate(async () => {
const images = Array.from(document.querySelectorAll('img'));
await Promise.all(images.map(img => {
if (img.complete) return Promise.resolve();
return new Promise((resolve) => {
img.addEventListener('load', resolve);
img.addEventListener('error', resolve);
setTimeout(resolve, 10000); // Increased from 5000ms for large docs
});
}));
});
await new Promise(resolve => setTimeout(resolve, 5000)); // Increased from 2000ms
progressTracker?.updateProgress(80, 'finalizing', 'Preparing document for PDF generation...');
// Set exact height
await page.evaluate(() => {
const getDocumentHeight = () => Math.max(
document.body.scrollHeight, document.body.offsetHeight,
document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight
);
const height = getDocumentHeight();
document.body.style.height = `${height}px !important`;
document.documentElement.style.height = `${height}px !important`;
document.body.style.overflow = 'hidden !important';
});
// Content verification (Unchanged, as it's quick)
const contentCheck = await page.evaluate(() => {
const textContent = document.body.textContent || '';
const images = document.querySelectorAll('img');
const documentImages = Array.from(images).filter(img =>
img.src.includes('document') || img.src.includes('page') ||
img.alt.includes('document') || img.alt.includes('page')
);
return {
totalText: textContent.length,
totalImages: images.length,
documentImages: documentImages.length,
hasDocumentContent: documentImages.length > 0 || textContent.length > 1000
};
});
console.log("πŸ“Š Content verification:", {
textLength: contentCheck.totalText,
images: contentCheck.totalImages,
documentImages: contentCheck.documentImages,
hasContent: contentCheck.hasDocumentContent
});
if (!contentCheck.hasDocumentContent) {
console.warn("⚠️ Warning: Limited document content detected.");
}
// Apply print styles and generate PDF
await applyPrintStyles(page, progressTracker);
await page.emulateMediaType('print');
progressTracker?.updateProgress(90, 'generating', 'Generating PDF...');
console.log("πŸ”„ Generating PDF...");
const pdfBuffer = await page.pdf({
printBackground: true,
preferCSSPageSize: true, // Use the @page size
displayHeaderFooter: false,
timeout: 180000, // Increased back to 180000 for large PDFs
scale: 1,
omitBackground: false
});
progressTracker?.updateProgress(100, 'completed', 'PDF generated successfully!');
console.log(`βœ… PDF generated successfully! Size: ${(pdfBuffer.length / 1024 / 1024).toFixed(2)} MB`);
return pdfBuffer;
} catch (error) {
progressTracker?.updateProgress(-1, 'error', error.message);
console.error("❌ Error during PDF generation:", error);
throw error;
} finally {
if (browser) {
console.log("πŸ”’ Closing browser...");
try {
await browser.close();
} catch (e) {
console.log("Error closing browser:", e.message);
}
}
}
};
// --- API Routes --- (Unchanged)
app.post('/api/request-download', (req, res) => {
const { url, email, password } = req.body;
if (!url || !url.includes('studocu.com')) {
return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' });
}
const sessionId = Date.now().toString();
const progressTracker = new ProgressTracker(sessionId);
progressTrackers.set(sessionId, progressTracker);
downloadJobs.set(sessionId, { status: 'processing' });
console.log(`🎯 Processing request for: ${url} [Session: ${sessionId}]`);
// Respond to the client immediately with the session ID
res.json({ sessionId });
// --- Start the PDF generation in the background ---
studocuDownloader(url, { email, password }, progressTracker)
.then(pdfBuffer => {
// Store the successful result
downloadJobs.set(sessionId, { status: 'completed', buffer: pdfBuffer });
progressTrackers.delete(sessionId); // Clean up live tracker
})
.catch(error => {
// Store the error
downloadJobs.set(sessionId, { status: 'error', message: error.message });
progressTrackers.delete(sessionId); // Clean up live tracker
});
});
app.get('/api/progress/:sessionId', (req, res) => {
const { sessionId } = req.params;
const tracker = progressTrackers.get(sessionId);
if (tracker) {
// Job is in progress, return live data
return res.json({
sessionId,
progress: tracker.progress,
status: tracker.status,
message: tracker.message,
timestamp: new Date().toISOString()
});
}
const job = downloadJobs.get(sessionId);
if (job) {
// Job is finished, return final state
if (job.status === 'completed') {
return res.json({ sessionId, progress: 100, status: 'completed', message: 'PDF generated successfully!' });
}
if (job.status === 'error') {
return res.json({ sessionId, progress: -1, status: 'error', message: job.message });
}
}
return res.status(404).json({ error: 'Session not found' });
});
app.get('/api/download/:sessionId', (req, res) => {
const { sessionId } = req.params;
const job = downloadJobs.get(sessionId);
if (!job) {
return res.status(404).json({ error: 'Download session not found or expired.' });
}
if (job.status === 'processing') {
return res.status(400).json({ error: 'Download is still processing.' });
}
if (job.status === 'error') {
return res.status(500).json({ error: `Failed to generate PDF: ${job.message}` });
}
if (job.status === 'completed' && job.buffer) {
res.setHeader('Content-Type', 'application/pdf');
res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf');
res.send(job.buffer);
// Optional: Clean up the job after download to save memory
// downloadJobs.delete(sessionId);
} else {
res.status(500).json({ error: 'An unknown error occurred.' });
}
});
// --- Health and Info Endpoints (Unchanged) ---
app.get('/health', (req, res) => {
res.json({
status: 'healthy',
timestamp: new Date().toISOString(),
uptime: process.uptime(),
activeDownloads: progressTrackers.size
});
});
app.get('/', (req, res) => {
res.json({
message: 'πŸš€ Enhanced StuDocu Downloader API v5.3 - Real-time Progress Tracking with Cloudflare Bypass',
version: '5.3.0',
features: [
'πŸͺ Advanced cookie banner bypass',
'πŸ”“ Premium content unblurring',
'πŸ”‘ Login support for full access',
'πŸ“Š Real-time progress tracking via polling',
'πŸ“„ Clean PDF generation with print styles',
'πŸ•΅οΈ Enhanced stealth to evade bot detection',
'☁️ Automatic Cloudflare challenge handling',
'πŸ§‘ Human-like behavior simulation'
],
endpoints: {
request: 'POST /api/request-download (body: {url, filename?, email?, password?})',
progress: 'GET /api/progress/:sessionId',
download: 'GET /api/download/:sessionId',
health: 'GET /health'
}
});
});
process.on('SIGTERM', () => {
console.log('SIGTERM received, shutting down gracefully...');
process.exit(0);
});
process.on('SIGINT', () => {
console.log('SIGINT received, shutting down gracefully...');
process.exit(0);
});
app.listen(port, () => {
console.log(`πŸš€ Enhanced StuDocu Downloader v5.3.0 running on http://localhost:${port}`);
console.log(`✨ Features: Real-time progress tracking, enhanced stealth, Cloudflare bypass, and user feedback`);
});