|
|
const express = require('express'); |
|
|
const puppeteerExtra = require('puppeteer-extra'); |
|
|
const StealthPlugin = require('puppeteer-extra-plugin-stealth'); |
|
|
const cors = require('cors'); |
|
|
const { EventEmitter } = require('events'); |
|
|
|
|
|
puppeteerExtra.use(StealthPlugin()); |
|
|
|
|
|
const app = express(); |
|
|
const port = 7860; |
|
|
|
|
|
app.use(cors()); |
|
|
app.use(express.json()); |
|
|
|
|
|
|
|
|
const progressTrackers = new Map(); |
|
|
const downloadJobs = new Map(); |
|
|
|
|
|
class ProgressTracker extends EventEmitter { |
|
|
constructor(sessionId) { |
|
|
super(); |
|
|
this.sessionId = sessionId; |
|
|
this.progress = 0; |
|
|
this.status = 'initializing'; |
|
|
this.message = ''; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
updateProgress(progress, status, message) { |
|
|
this.progress = progress; |
|
|
this.status = status; |
|
|
this.message = message; |
|
|
const update = { |
|
|
sessionId: this.sessionId, |
|
|
progress, |
|
|
status, |
|
|
message, |
|
|
timestamp: new Date().toISOString() |
|
|
}; |
|
|
this.emit('progress', update); |
|
|
console.log(`π [${this.sessionId}] ${progress}% - ${status}: ${message}`); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const simulateHumanBehavior = async (page, progressTracker) => { |
|
|
console.log("π§ Simulating human-like mouse movements and delays..."); |
|
|
const viewport = page.viewport(); |
|
|
for (let i = 0; i < 5; i++) { |
|
|
const x = Math.random() * (viewport.width || 1920); |
|
|
const y = Math.random() * (viewport.height || 1080); |
|
|
await page.mouse.move(x, y, { steps: 10 }); |
|
|
await page.waitForTimeout(Math.random() * 1000 + 500); |
|
|
} |
|
|
|
|
|
await page.evaluate(() => { |
|
|
window.scrollBy(0, Math.random() * 200 - 100); |
|
|
}); |
|
|
await page.waitForTimeout(Math.random() * 2000 + 1000); |
|
|
progressTracker?.updateProgress(progressTracker.progress + 1, 'humanizing', 'Human behavior simulated'); |
|
|
}; |
|
|
|
|
|
|
|
|
const handleCloudflareChallenge = async (page, progressTracker) => { |
|
|
progressTracker?.updateProgress(35, 'cloudflare', 'Detecting and bypassing Cloudflare...'); |
|
|
|
|
|
console.log("βοΈ Checking for Cloudflare challenge..."); |
|
|
const cloudflareSelectors = [ |
|
|
'#challenge-running', |
|
|
'.cf-browser-verification', |
|
|
'[data-ray]', |
|
|
'#cf-challenge-running', |
|
|
'.under-attack', |
|
|
'iframe[src*="cloudflare"]', |
|
|
'#challenge-form', |
|
|
'.cf-turnstile' |
|
|
]; |
|
|
|
|
|
|
|
|
let challengeDetected = false; |
|
|
for (const selector of cloudflareSelectors) { |
|
|
try { |
|
|
await page.waitForSelector(selector, { timeout: 5000 }); |
|
|
challengeDetected = true; |
|
|
console.log(`βοΈ Cloudflare challenge detected with selector: ${selector}`); |
|
|
break; |
|
|
} catch (e) { |
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
if (challengeDetected) { |
|
|
|
|
|
await simulateHumanBehavior(page, progressTracker); |
|
|
|
|
|
|
|
|
console.log("β³ Waiting for Cloudflare challenge to complete..."); |
|
|
try { |
|
|
await page.waitForFunction(() => { |
|
|
const selectors = [ |
|
|
'#challenge-running', |
|
|
'.cf-browser-verification', |
|
|
'[data-ray]', |
|
|
'#cf-challenge-running', |
|
|
'.under-attack', |
|
|
'#challenge-form', |
|
|
'.cf-turnstile' |
|
|
]; |
|
|
return !selectors.some(sel => document.querySelector(sel)); |
|
|
}, { timeout: 90000 }); |
|
|
} catch (e) { |
|
|
console.log("β οΈ Standard wait failed, attempting Turnstile click..."); |
|
|
|
|
|
try { |
|
|
const cfInput = await page.$('[name="cf-turnstile-response"]'); |
|
|
if (cfInput) { |
|
|
const parentItem = await cfInput.evaluateHandle((element) => element.parentElement); |
|
|
const coordinates = await parentItem.boundingBox(); |
|
|
if (coordinates) { |
|
|
await page.mouse.click(coordinates.x + 25, coordinates.y + coordinates.height / 2); |
|
|
console.log("π±οΈ Clicked on Turnstile CAPTCHA"); |
|
|
await page.waitForTimeout(3000); |
|
|
} |
|
|
} |
|
|
|
|
|
await page.waitForFunction(() => { |
|
|
const selectors = [ |
|
|
'#challenge-running', |
|
|
'.cf-browser-verification', |
|
|
'[data-ray]', |
|
|
'#cf-challenge-running', |
|
|
'.under-attack', |
|
|
'#challenge-form', |
|
|
'.cf-turnstile' |
|
|
]; |
|
|
return !selectors.some(sel => document.querySelector(sel)); |
|
|
}, { timeout: 60000 }); |
|
|
} catch (clickError) { |
|
|
console.error("β Turnstile click failed:", clickError.message); |
|
|
throw new Error("Failed to bypass Cloudflare challenge. Try again later or use a proxy."); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
const randomDelay = (min, max) => Math.floor(Math.random() * (max - min + 1) + min); |
|
|
await page.waitForTimeout(randomDelay(3000, 7000)); |
|
|
console.log("β
Cloudflare challenge bypassed successfully."); |
|
|
progressTracker?.updateProgress(38, 'cloudflare', 'Cloudflare bypassed'); |
|
|
} else { |
|
|
console.log("β
No Cloudflare challenge detected."); |
|
|
} |
|
|
}; |
|
|
|
|
|
|
|
|
const bypassCookiesAndRestrictions = async (page, progressTracker) => { |
|
|
progressTracker?.updateProgress(5, 'bypassing', 'Setting up cookie bypass...'); |
|
|
|
|
|
console.log("πͺ Starting comprehensive cookie and restriction bypass..."); |
|
|
|
|
|
const preCookies = [ |
|
|
{ name: 'cookieConsent', value: 'accepted', domain: '.studocu.com' }, |
|
|
{ name: 'cookie_consent', value: 'true', domain: '.studocu.com' }, |
|
|
{ name: 'gdpr_consent', value: 'accepted', domain: '.studocu.com' }, |
|
|
{ name: 'privacy_policy_accepted', value: 'true', domain: '.studocu.com' }, |
|
|
{ name: 'user_consent', value: '1', domain: '.studocu.com' }, |
|
|
{ name: 'analytics_consent', value: 'false', domain: '.studocu.com' }, |
|
|
{ name: 'marketing_consent', value: 'false', domain: '.studocu.com' }, |
|
|
{ name: 'functional_consent', value: 'true', domain: '.studocu.com' }, |
|
|
]; |
|
|
for (const cookie of preCookies) { |
|
|
try { |
|
|
await page.setCookie(cookie); |
|
|
} catch (e) { |
|
|
console.log(`Failed to set cookie ${cookie.name}:`, e.message); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
await page.addStyleTag({ |
|
|
content: ` |
|
|
/* Hide all possible cookie banners */ |
|
|
[id*="cookie" i]:not(img):not(input), [class*="cookie" i]:not(img):not(input), [data-testid*="cookie" i], [aria-label*="cookie" i], |
|
|
.gdpr-banner, .gdpr-popup, .gdpr-modal, .consent-banner, .consent-popup, .consent-modal, .privacy-banner, .privacy-popup, .privacy-modal, |
|
|
.cookie-law, .cookie-policy, .cookie-compliance, .onetrust-banner-sdk, #onetrust-consent-sdk, .cmp-banner, .cmp-popup, .cmp-modal, |
|
|
[class*="CookieBanner"], [class*="CookieNotice"], [class*="ConsentBanner"], [class*="ConsentManager"], .cc-banner, .cc-window, .cc-compliance, |
|
|
div[style*="position: fixed"]:has-text("cookie"), div[style*="position: fixed"]:has-text("consent"), .fixed:has-text("cookie"), .fixed:has-text("consent") { |
|
|
display: none !important; |
|
|
visibility: hidden !important; |
|
|
opacity: 0 !important; |
|
|
z-index: -9999 !important; |
|
|
pointer-events: none !important; |
|
|
} |
|
|
/* Remove blur and premium overlays, including previews */ |
|
|
[class*="blur" i], [class*="premium" i], [class*="paywall" i], [class*="sample-preview-blur" i], [class*="preview" i], [class*="blurred-container" i], [class*="blurred" i] { |
|
|
display: none !important; |
|
|
filter: none !important; |
|
|
backdrop-filter: none !important; |
|
|
opacity: 1 !important; |
|
|
visibility: visible !important; |
|
|
} |
|
|
/* Ensure document content is visible */ |
|
|
.document-content, .page-content, [data-page] { |
|
|
filter: none !important; |
|
|
opacity: 1 !important; |
|
|
visibility: visible !important; |
|
|
pointer-events: auto !important; |
|
|
} |
|
|
/* Remove fixed overlays */ |
|
|
.fixed-overlay, .sticky-overlay, .content-overlay { |
|
|
display: none !important; |
|
|
} |
|
|
/* Restore scrolling */ |
|
|
html, body { |
|
|
overflow: auto !important; |
|
|
position: static !important; |
|
|
} |
|
|
/* Hide Cloudflare elements if they persist */ |
|
|
#challenge-running, .cf-browser-verification, [data-ray], .under-attack { |
|
|
display: none !important; |
|
|
} |
|
|
` |
|
|
}); |
|
|
|
|
|
|
|
|
await page.evaluateOnNewDocument(() => { |
|
|
|
|
|
window.cookieConsent = { accepted: true }; |
|
|
window.gtag = () => { }; |
|
|
window.ga = () => { }; |
|
|
window.dataLayer = []; |
|
|
|
|
|
|
|
|
const observer = new MutationObserver((mutations) => { |
|
|
mutations.forEach((mutation) => { |
|
|
mutation.addedNodes.forEach((node) => { |
|
|
if (node.nodeType === 1) { |
|
|
const element = node; |
|
|
const text = element.textContent || ''; |
|
|
const className = element.className || ''; |
|
|
const id = element.id || ''; |
|
|
|
|
|
if ( |
|
|
text.toLowerCase().includes('cookie') || |
|
|
text.toLowerCase().includes('consent') || |
|
|
text.toLowerCase().includes('privacy policy') || |
|
|
className.toLowerCase().includes('cookie') || |
|
|
className.toLowerCase().includes('consent') || |
|
|
className.toLowerCase().includes('gdpr') || |
|
|
id.toLowerCase().includes('cookie') || |
|
|
id.toLowerCase().includes('consent') |
|
|
) { |
|
|
console.log('Removing detected cookie banner:', element); |
|
|
element.remove(); |
|
|
} |
|
|
} |
|
|
}); |
|
|
}); |
|
|
}); |
|
|
observer.observe(document.body, { childList: true, subtree: true }); |
|
|
|
|
|
|
|
|
setInterval(() => { |
|
|
const cookieElements = document.querySelectorAll(` |
|
|
[id*="cookie" i]:not(img):not(input), [class*="cookie" i]:not(img):not(input), [data-testid*="cookie" i], |
|
|
.gdpr-banner, .consent-banner, .privacy-banner, .onetrust-banner-sdk, #onetrust-consent-sdk, |
|
|
.cmp-banner, .cc-banner |
|
|
`); |
|
|
cookieElements.forEach(el => el.remove()); |
|
|
|
|
|
document.body.style.overflow = 'auto'; |
|
|
document.documentElement.style.overflow = 'auto'; |
|
|
}, 1000); |
|
|
}); |
|
|
|
|
|
progressTracker?.updateProgress(10, 'bypassing', 'Cookie bypass configured successfully'); |
|
|
return true; |
|
|
}; |
|
|
|
|
|
const unblurContent = async (page, progressTracker) => { |
|
|
progressTracker?.updateProgress(15, 'unblurring', 'Removing content restrictions...'); |
|
|
|
|
|
console.log("π Unblurring content and bypassing premium restrictions..."); |
|
|
await page.evaluate(() => { |
|
|
const removeRestrictions = () => { |
|
|
const removeBySelector = (selector) => { |
|
|
document.querySelectorAll(selector).forEach(el => el.remove()); |
|
|
}; |
|
|
|
|
|
removeBySelector("#adbox, .adsbox, .ad-box, .banner-ads, .advert"); |
|
|
removeBySelector(".PremiumBannerBlobWrapper_overflow-wrapper__xsaS8"); |
|
|
|
|
|
removeBySelector('[class*="preview" i], [class*="blurred-container" i], [class*="blurred" i]:not(img)'); |
|
|
|
|
|
const removeBlur = (element = document) => { |
|
|
element.querySelectorAll("*").forEach(el => { |
|
|
const style = window.getComputedStyle(el); |
|
|
if ( |
|
|
style.filter?.includes("blur") || |
|
|
style.backdropFilter?.includes("blur") || |
|
|
parseFloat(style.opacity) < 1 || |
|
|
(el.className && el.className.toString().toLowerCase().includes("blur")) || |
|
|
(el.className && el.className.toString().toLowerCase().includes("premium")) |
|
|
) { |
|
|
el.style.filter = "none !important"; |
|
|
el.style.backdropFilter = "none !important"; |
|
|
el.style.opacity = "1 !important"; |
|
|
if (el.classList) { |
|
|
el.classList.remove("blur", "blurred", "premium-blur"); |
|
|
} |
|
|
} |
|
|
}); |
|
|
}; |
|
|
|
|
|
removeBlur(); |
|
|
|
|
|
const contentSelectors = [ |
|
|
'.document-content', '.page-content', '.content', '[data-page]', '[data-testid*="document"]', |
|
|
'[data-testid*="page"]', '.page', '.document-page', 'main', 'article' |
|
|
]; |
|
|
contentSelectors.forEach(selector => { |
|
|
document.querySelectorAll(selector).forEach(el => { |
|
|
el.style.setProperty('filter', 'none', 'important'); |
|
|
el.style.setProperty('opacity', '1', 'important'); |
|
|
el.style.setProperty('visibility', 'visible', 'important'); |
|
|
el.style.setProperty('display', 'block', 'important'); |
|
|
el.style.setProperty('pointer-events', 'auto', 'important'); |
|
|
}); |
|
|
}); |
|
|
}; |
|
|
|
|
|
removeRestrictions(); |
|
|
const intervalId = setInterval(removeRestrictions, 1000); |
|
|
setTimeout(() => clearInterval(intervalId), 30000); |
|
|
}); |
|
|
|
|
|
progressTracker?.updateProgress(20, 'unblurring', 'Content restrictions removed'); |
|
|
}; |
|
|
|
|
|
|
|
|
const fetchClearImages = async (page, progressTracker) => { |
|
|
progressTracker?.updateProgress(65, 'unblurring_images', 'Fetching clear page images...'); |
|
|
|
|
|
console.log("πΌοΈ Modifying blurred image URLs to fetch clear versions..."); |
|
|
await page.evaluate(() => { |
|
|
const images = document.querySelectorAll('img[src*="/blurred/"]'); |
|
|
images.forEach(img => { |
|
|
img.src = img.src.replace(/\/blurred\//, '/'); |
|
|
console.log(`Modified image src: ${img.src}`); |
|
|
}); |
|
|
}); |
|
|
|
|
|
|
|
|
await page.evaluate(async () => { |
|
|
const images = Array.from(document.querySelectorAll('img')); |
|
|
await Promise.all(images.map(img => { |
|
|
if (img.complete) return Promise.resolve(); |
|
|
return new Promise((resolve) => { |
|
|
img.addEventListener('load', resolve); |
|
|
img.addEventListener('error', resolve); |
|
|
setTimeout(resolve, 10000); |
|
|
}); |
|
|
})); |
|
|
}); |
|
|
|
|
|
await new Promise(resolve => setTimeout(resolve, 3000)); |
|
|
progressTracker?.updateProgress(70, 'unblurring_images', 'Clear images loaded'); |
|
|
}; |
|
|
|
|
|
const applyPrintStyles = async (page, progressTracker) => { |
|
|
progressTracker?.updateProgress(85, 'styling', 'Applying print styles...'); |
|
|
|
|
|
console.log("π¨οΈ Applying print styles for clean PDF..."); |
|
|
await page.evaluate(() => { |
|
|
const style = document.createElement("style"); |
|
|
style.id = "print-style-extension"; |
|
|
style.innerHTML = ` |
|
|
@page { |
|
|
/* Set page size to A4 and remove default margins */ |
|
|
size: A4 portrait; |
|
|
margin: 0mm; |
|
|
} |
|
|
@media print { |
|
|
html, body { |
|
|
/* Ensure the body takes the full width and has no extra padding/margin */ |
|
|
width: 210mm !important; |
|
|
height: auto !important; |
|
|
margin: 0 !important; |
|
|
padding: 0 !important; |
|
|
overflow: visible !important; |
|
|
background: white !important; |
|
|
color: black !important; |
|
|
display: flex; |
|
|
justify-content: center; |
|
|
} |
|
|
/* Remove all unwanted elements like headers, footers, sidebars, etc. */ |
|
|
header, footer, nav, aside, .no-print, .ads, .sidebar, .premium-banner, |
|
|
[class*="Header"], [class*="Footer"], [class*="Sidebar"], [id*="Header"], |
|
|
.ViewerToolbar, .Layout_info-bar-wrapper__He0Ho, .Sidebar_sidebar-scrollable__kqeBZ, |
|
|
.HeaderWrapper_header-wrapper__mCmf3, .Layout_visible-content-bottom-wrapper-sticky__yaaAB, |
|
|
.Layout_bottom-section-wrapper__yBWWk, .Layout_footer-wrapper__bheJQ, |
|
|
.InlineBanner_inline-banner-wrapper__DAi5X, .banner-wrapper, #top-bar-wrapper, |
|
|
.Layout_sidebar-wrapper__unavM, .Layout_is-open__9DQr4 { |
|
|
display: none !important; |
|
|
} |
|
|
/* Force all elements to have a transparent background and no shadow */ |
|
|
* { |
|
|
box-shadow: none !important; |
|
|
background: transparent !important; |
|
|
color: inherit !important; |
|
|
} |
|
|
/* |
|
|
* KEY FIX: Target the main document container. |
|
|
* Force it to be a block element, remove any transforms or max-widths, |
|
|
* and center it perfectly within the page. |
|
|
*/ |
|
|
.Viewer_document-wrapper__JPBWQ, .Viewer_document-wrapper__LXzoQ, |
|
|
.Viewer_document-wrapper__XsO4j, .page-content, .document-viewer, #page-container { |
|
|
position: static !important; |
|
|
display: block !important; |
|
|
width: 100% !important; |
|
|
max-width: none !important; |
|
|
margin: 0 auto !important; /* Center horizontally */ |
|
|
padding: 0 !important; |
|
|
box-sizing: border-box; /* Include padding in width calculation */ |
|
|
transform: none !important; |
|
|
} |
|
|
/* Ensure individual pages and images within the document use the full width */ |
|
|
[data-page], .page, .document-page, img { |
|
|
page-break-after: always !important; |
|
|
page-break-inside: avoid !important; |
|
|
page-break-before: avoid !important; |
|
|
width: 100% !important; |
|
|
max-width: 100% !important; |
|
|
height: auto !important; |
|
|
display: block !important; |
|
|
margin: 0 !important; |
|
|
padding: 0 !important; |
|
|
} |
|
|
} |
|
|
`; |
|
|
document.head.appendChild(style); |
|
|
}); |
|
|
|
|
|
progressTracker?.updateProgress(88, 'styling', 'Print styles applied successfully'); |
|
|
}; |
|
|
const studocuDownloader = async (url, options = {}, progressTracker = null) => { |
|
|
let browser; |
|
|
try { |
|
|
progressTracker?.updateProgress(0, 'initializing', 'Starting browser...'); |
|
|
|
|
|
console.log("π Launching browser with enhanced stealth configuration..."); |
|
|
browser = await puppeteerExtra.launch({ |
|
|
headless: true, |
|
|
args: [ |
|
|
'--no-sandbox', |
|
|
'--disable-setuid-sandbox', |
|
|
'--disable-dev-shm-usage', |
|
|
'--disable-accelerated-2d-canvas', |
|
|
'--no-first-run', |
|
|
'--no-zygote', |
|
|
'--disable-gpu', |
|
|
'--disable-features=VizDisplayCompositor', |
|
|
'--disable-background-networking', |
|
|
'--disable-background-timer-throttling', |
|
|
'--disable-renderer-backgrounding', |
|
|
'--disable-backgrounding-occluded-windows', |
|
|
'--disable-ipc-flooding-protection', |
|
|
'--disable-web-security', |
|
|
'--disable-features=site-per-process', |
|
|
'--disable-blink-features=AutomationControlled', |
|
|
'--disable-extensions', |
|
|
'--ignore-certificate-errors', |
|
|
|
|
|
'--disable-features=TranslateUI', |
|
|
'--disable-ipc-flooding', |
|
|
'--disable-backgrounding-occluded-windows', |
|
|
'--disable-renderer-backgrounding', |
|
|
'--disable-features=TranslateUI,BlinkGenPropertyTrees', |
|
|
'--metrics-recording-only', |
|
|
'--no-default-browser-check', |
|
|
'--safebrowsing-disable-auto-update', |
|
|
'--password-store=basic', |
|
|
'--use-mock-keychain' |
|
|
], |
|
|
ignoreHTTPSErrors: true, |
|
|
timeout: 300000, |
|
|
}); |
|
|
|
|
|
const page = await browser.newPage(); |
|
|
|
|
|
progressTracker?.updateProgress(2, 'initializing', 'Configuring browser settings...'); |
|
|
|
|
|
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'); |
|
|
await page.setViewport({ width: 1920, height: 1080 }); |
|
|
|
|
|
|
|
|
await page.evaluateOnNewDocument(() => { |
|
|
Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); |
|
|
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); |
|
|
Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] }); |
|
|
|
|
|
Object.defineProperty(navigator, 'permissions', { |
|
|
get: () => ({ |
|
|
query: () => Promise.resolve({ state: 'granted' }) |
|
|
}) |
|
|
}); |
|
|
window.chrome = { |
|
|
runtime: {}, |
|
|
loadTimes: function () { }, |
|
|
csi: function () { }, |
|
|
app: {} |
|
|
}; |
|
|
}); |
|
|
|
|
|
|
|
|
await bypassCookiesAndRestrictions(page, progressTracker); |
|
|
|
|
|
|
|
|
await page.setRequestInterception(true); |
|
|
page.on('request', (req) => { |
|
|
const resourceType = req.resourceType(); |
|
|
const reqUrl = req.url().toLowerCase(); |
|
|
|
|
|
if (resourceType === 'document') { |
|
|
req.continue(); |
|
|
return; |
|
|
} |
|
|
|
|
|
|
|
|
if (reqUrl.includes('cloudflare') || reqUrl.includes('cf-')) { |
|
|
req.continue(); |
|
|
return; |
|
|
} |
|
|
|
|
|
if ( |
|
|
['image', 'media', 'font', 'stylesheet'].includes(resourceType) && |
|
|
!reqUrl.includes('document') && !reqUrl.includes('page') && !reqUrl.includes('studocu') || |
|
|
resourceType === 'script' && !reqUrl.includes('studocu') && !reqUrl.includes('cloudflare') || |
|
|
reqUrl.includes('doubleclick') || |
|
|
reqUrl.includes('googletagmanager') || |
|
|
reqUrl.includes('facebook.com') || |
|
|
reqUrl.includes('twitter.com') || |
|
|
reqUrl.includes('analytics') || |
|
|
reqUrl.includes('gtm') || |
|
|
reqUrl.includes('hotjar') || |
|
|
reqUrl.includes('mixpanel') || |
|
|
reqUrl.includes('onetrust') || |
|
|
reqUrl.includes('cookielaw') || |
|
|
(resourceType === 'other' && reqUrl.includes('/track/')) |
|
|
) { |
|
|
req.abort(); |
|
|
} else { |
|
|
req.continue(); |
|
|
} |
|
|
}); |
|
|
|
|
|
|
|
|
if (options.email && options.password) { |
|
|
progressTracker?.updateProgress(12, 'authenticating', 'Logging into StuDocu...'); |
|
|
|
|
|
console.log("π Logging in to StuDocu..."); |
|
|
await page.goto('https://www.studocu.com/en-us/login', { waitUntil: 'domcontentloaded', timeout: 120000 }); |
|
|
|
|
|
await handleCloudflareChallenge(page, progressTracker); |
|
|
await page.waitForSelector('#email', { timeout: 15000 }); |
|
|
await page.type('#email', options.email); |
|
|
await page.type('#password', options.password); |
|
|
await page.click('button[type="submit"]'); |
|
|
try { |
|
|
await page.waitForNavigation({ waitUntil: 'networkidle2', timeout: 30000 }); |
|
|
await page.waitForSelector('.user-profile, [data-testid="user-menu"]', { timeout: 10000 }); |
|
|
console.log("β
Login successful."); |
|
|
progressTracker?.updateProgress(18, 'authenticated', 'Login successful'); |
|
|
} catch (e) { |
|
|
console.error("β Login failed:", e.message); |
|
|
throw new Error("Login failed. Check credentials or try again."); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
progressTracker?.updateProgress(30, 'navigating', 'Navigating to document...'); |
|
|
console.log(`π Navigating to ${url}...`); |
|
|
|
|
|
let navigationSuccess = false; |
|
|
let attempts = 0; |
|
|
const maxAttempts = 3; |
|
|
while (!navigationSuccess && attempts < maxAttempts) { |
|
|
try { |
|
|
attempts++; |
|
|
progressTracker?.updateProgress(30 + (attempts * 5), 'navigating', `Navigation attempt ${attempts}/${maxAttempts}`); |
|
|
console.log(`Navigation attempt ${attempts}/${maxAttempts}`); |
|
|
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 120000 }); |
|
|
navigationSuccess = true; |
|
|
} catch (e) { |
|
|
console.log(`Navigation attempt ${attempts} failed:`, e.message); |
|
|
if (attempts >= maxAttempts) throw e; |
|
|
await new Promise(resolve => setTimeout(resolve, 10000)); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
await handleCloudflareChallenge(page, progressTracker); |
|
|
|
|
|
progressTracker?.updateProgress(40, 'loading', 'Page loaded, waiting for content...'); |
|
|
await new Promise(resolve => setTimeout(resolve, 5000)); |
|
|
|
|
|
|
|
|
await unblurContent(page, progressTracker); |
|
|
|
|
|
|
|
|
progressTracker?.updateProgress(45, 'loading', 'Waiting for document content...'); |
|
|
console.log("β³ Waiting for document content to load..."); |
|
|
|
|
|
const contentSelectors = [ |
|
|
'.document-content', '.page-content', '[data-page]', '[data-testid*="document"]', |
|
|
'img[src*="document"]', 'img[src*="page"]', '.page', 'main img', 'article img' |
|
|
]; |
|
|
let contentFound = false; |
|
|
for (const selector of contentSelectors) { |
|
|
try { |
|
|
await page.waitForSelector(selector, { timeout: 20000 }); |
|
|
console.log(`β
Found content with selector: ${selector}`); |
|
|
contentFound = true; |
|
|
break; |
|
|
} catch (e) { |
|
|
console.log(`β Selector ${selector} not found, trying next...`); |
|
|
} |
|
|
} |
|
|
|
|
|
if (!contentFound) { |
|
|
console.log("β οΈ No specific content selector found, proceeding with page content..."); |
|
|
} |
|
|
|
|
|
|
|
|
progressTracker?.updateProgress(50, 'scrolling', 'Loading all document pages...'); |
|
|
console.log("π Loading all document pages with enhanced slow scroll..."); |
|
|
|
|
|
await page.evaluate(async () => { |
|
|
const delay = (ms) => new Promise((res) => setTimeout(res, ms)); |
|
|
let scrollHeight = document.body.scrollHeight; |
|
|
while (true) { |
|
|
let totalHeight = 0; |
|
|
const distance = 600; |
|
|
while (totalHeight < scrollHeight) { |
|
|
window.scrollBy(0, distance); |
|
|
totalHeight += distance; |
|
|
await delay(300); |
|
|
} |
|
|
await delay(2000); |
|
|
const newHeight = document.body.scrollHeight; |
|
|
if (newHeight === scrollHeight) break; |
|
|
scrollHeight = newHeight; |
|
|
} |
|
|
window.scrollTo({ top: 0, behavior: "smooth" }); |
|
|
await delay(1000); |
|
|
}); |
|
|
|
|
|
|
|
|
await unblurContent(page, progressTracker); |
|
|
|
|
|
|
|
|
await fetchClearImages(page, progressTracker); |
|
|
|
|
|
|
|
|
progressTracker?.updateProgress(75, 'loading_images', 'Loading images...'); |
|
|
console.log("πΌοΈ Waiting for all images to load..."); |
|
|
|
|
|
await page.evaluate(async () => { |
|
|
const images = Array.from(document.querySelectorAll('img')); |
|
|
await Promise.all(images.map(img => { |
|
|
if (img.complete) return Promise.resolve(); |
|
|
return new Promise((resolve) => { |
|
|
img.addEventListener('load', resolve); |
|
|
img.addEventListener('error', resolve); |
|
|
setTimeout(resolve, 10000); |
|
|
}); |
|
|
})); |
|
|
}); |
|
|
|
|
|
await new Promise(resolve => setTimeout(resolve, 5000)); |
|
|
progressTracker?.updateProgress(80, 'finalizing', 'Preparing document for PDF generation...'); |
|
|
|
|
|
|
|
|
await page.evaluate(() => { |
|
|
const getDocumentHeight = () => Math.max( |
|
|
document.body.scrollHeight, document.body.offsetHeight, |
|
|
document.documentElement.clientHeight, document.documentElement.scrollHeight, document.documentElement.offsetHeight |
|
|
); |
|
|
const height = getDocumentHeight(); |
|
|
document.body.style.height = `${height}px !important`; |
|
|
document.documentElement.style.height = `${height}px !important`; |
|
|
document.body.style.overflow = 'hidden !important'; |
|
|
}); |
|
|
|
|
|
|
|
|
const contentCheck = await page.evaluate(() => { |
|
|
const textContent = document.body.textContent || ''; |
|
|
const images = document.querySelectorAll('img'); |
|
|
const documentImages = Array.from(images).filter(img => |
|
|
img.src.includes('document') || img.src.includes('page') || |
|
|
img.alt.includes('document') || img.alt.includes('page') |
|
|
); |
|
|
return { |
|
|
totalText: textContent.length, |
|
|
totalImages: images.length, |
|
|
documentImages: documentImages.length, |
|
|
hasDocumentContent: documentImages.length > 0 || textContent.length > 1000 |
|
|
}; |
|
|
}); |
|
|
|
|
|
console.log("π Content verification:", { |
|
|
textLength: contentCheck.totalText, |
|
|
images: contentCheck.totalImages, |
|
|
documentImages: contentCheck.documentImages, |
|
|
hasContent: contentCheck.hasDocumentContent |
|
|
}); |
|
|
|
|
|
if (!contentCheck.hasDocumentContent) { |
|
|
console.warn("β οΈ Warning: Limited document content detected."); |
|
|
} |
|
|
|
|
|
|
|
|
await applyPrintStyles(page, progressTracker); |
|
|
await page.emulateMediaType('print'); |
|
|
|
|
|
progressTracker?.updateProgress(90, 'generating', 'Generating PDF...'); |
|
|
console.log("π Generating PDF..."); |
|
|
|
|
|
const pdfBuffer = await page.pdf({ |
|
|
printBackground: true, |
|
|
preferCSSPageSize: true, |
|
|
displayHeaderFooter: false, |
|
|
timeout: 180000, |
|
|
scale: 1, |
|
|
omitBackground: false |
|
|
}); |
|
|
|
|
|
progressTracker?.updateProgress(100, 'completed', 'PDF generated successfully!'); |
|
|
console.log(`β
PDF generated successfully! Size: ${(pdfBuffer.length / 1024 / 1024).toFixed(2)} MB`); |
|
|
return pdfBuffer; |
|
|
|
|
|
} catch (error) { |
|
|
progressTracker?.updateProgress(-1, 'error', error.message); |
|
|
console.error("β Error during PDF generation:", error); |
|
|
throw error; |
|
|
} finally { |
|
|
if (browser) { |
|
|
console.log("π Closing browser..."); |
|
|
try { |
|
|
await browser.close(); |
|
|
} catch (e) { |
|
|
console.log("Error closing browser:", e.message); |
|
|
} |
|
|
} |
|
|
} |
|
|
}; |
|
|
|
|
|
|
|
|
app.post('/api/request-download', (req, res) => { |
|
|
const { url, email, password } = req.body; |
|
|
if (!url || !url.includes('studocu.com')) { |
|
|
return res.status(400).json({ error: 'Please provide a valid StuDocu URL.' }); |
|
|
} |
|
|
|
|
|
const sessionId = Date.now().toString(); |
|
|
const progressTracker = new ProgressTracker(sessionId); |
|
|
|
|
|
progressTrackers.set(sessionId, progressTracker); |
|
|
downloadJobs.set(sessionId, { status: 'processing' }); |
|
|
|
|
|
console.log(`π― Processing request for: ${url} [Session: ${sessionId}]`); |
|
|
|
|
|
|
|
|
res.json({ sessionId }); |
|
|
|
|
|
|
|
|
studocuDownloader(url, { email, password }, progressTracker) |
|
|
.then(pdfBuffer => { |
|
|
|
|
|
downloadJobs.set(sessionId, { status: 'completed', buffer: pdfBuffer }); |
|
|
progressTrackers.delete(sessionId); |
|
|
}) |
|
|
.catch(error => { |
|
|
|
|
|
downloadJobs.set(sessionId, { status: 'error', message: error.message }); |
|
|
progressTrackers.delete(sessionId); |
|
|
}); |
|
|
}); |
|
|
|
|
|
app.get('/api/progress/:sessionId', (req, res) => { |
|
|
const { sessionId } = req.params; |
|
|
const tracker = progressTrackers.get(sessionId); |
|
|
|
|
|
if (tracker) { |
|
|
|
|
|
return res.json({ |
|
|
sessionId, |
|
|
progress: tracker.progress, |
|
|
status: tracker.status, |
|
|
message: tracker.message, |
|
|
timestamp: new Date().toISOString() |
|
|
}); |
|
|
} |
|
|
|
|
|
const job = downloadJobs.get(sessionId); |
|
|
if (job) { |
|
|
|
|
|
if (job.status === 'completed') { |
|
|
return res.json({ sessionId, progress: 100, status: 'completed', message: 'PDF generated successfully!' }); |
|
|
} |
|
|
if (job.status === 'error') { |
|
|
return res.json({ sessionId, progress: -1, status: 'error', message: job.message }); |
|
|
} |
|
|
} |
|
|
|
|
|
return res.status(404).json({ error: 'Session not found' }); |
|
|
}); |
|
|
|
|
|
app.get('/api/download/:sessionId', (req, res) => { |
|
|
const { sessionId } = req.params; |
|
|
const job = downloadJobs.get(sessionId); |
|
|
|
|
|
if (!job) { |
|
|
return res.status(404).json({ error: 'Download session not found or expired.' }); |
|
|
} |
|
|
|
|
|
if (job.status === 'processing') { |
|
|
return res.status(400).json({ error: 'Download is still processing.' }); |
|
|
} |
|
|
|
|
|
if (job.status === 'error') { |
|
|
return res.status(500).json({ error: `Failed to generate PDF: ${job.message}` }); |
|
|
} |
|
|
|
|
|
if (job.status === 'completed' && job.buffer) { |
|
|
res.setHeader('Content-Type', 'application/pdf'); |
|
|
res.setHeader('Content-Disposition', 'attachment; filename=studocu-document.pdf'); |
|
|
res.send(job.buffer); |
|
|
|
|
|
|
|
|
} else { |
|
|
res.status(500).json({ error: 'An unknown error occurred.' }); |
|
|
} |
|
|
}); |
|
|
|
|
|
|
|
|
app.get('/health', (req, res) => { |
|
|
res.json({ |
|
|
status: 'healthy', |
|
|
timestamp: new Date().toISOString(), |
|
|
uptime: process.uptime(), |
|
|
activeDownloads: progressTrackers.size |
|
|
}); |
|
|
}); |
|
|
|
|
|
app.get('/', (req, res) => { |
|
|
res.json({ |
|
|
message: 'π Enhanced StuDocu Downloader API v5.3 - Real-time Progress Tracking with Cloudflare Bypass', |
|
|
version: '5.3.0', |
|
|
features: [ |
|
|
'πͺ Advanced cookie banner bypass', |
|
|
'π Premium content unblurring', |
|
|
'π Login support for full access', |
|
|
'π Real-time progress tracking via polling', |
|
|
'π Clean PDF generation with print styles', |
|
|
'π΅οΈ Enhanced stealth to evade bot detection', |
|
|
'βοΈ Automatic Cloudflare challenge handling', |
|
|
'π§ Human-like behavior simulation' |
|
|
], |
|
|
endpoints: { |
|
|
request: 'POST /api/request-download (body: {url, filename?, email?, password?})', |
|
|
progress: 'GET /api/progress/:sessionId', |
|
|
download: 'GET /api/download/:sessionId', |
|
|
health: 'GET /health' |
|
|
} |
|
|
}); |
|
|
}); |
|
|
|
|
|
process.on('SIGTERM', () => { |
|
|
console.log('SIGTERM received, shutting down gracefully...'); |
|
|
process.exit(0); |
|
|
}); |
|
|
|
|
|
process.on('SIGINT', () => { |
|
|
console.log('SIGINT received, shutting down gracefully...'); |
|
|
process.exit(0); |
|
|
}); |
|
|
|
|
|
app.listen(port, () => { |
|
|
console.log(`π Enhanced StuDocu Downloader v5.3.0 running on http://localhost:${port}`); |
|
|
console.log(`β¨ Features: Real-time progress tracking, enhanced stealth, Cloudflare bypass, and user feedback`); |
|
|
}); |