Spaces:
Build error
Build error
| # Version: 3 | |
| import streamlit as st | |
| import time | |
| from selenium import webdriver | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.chrome.service import Service | |
| from selenium.webdriver.chrome.options import Options | |
| from selenium.webdriver.support.ui import WebDriverWait | |
| from selenium.webdriver.support import expected_conditions as EC | |
| from webdriver_manager.chrome import ChromeDriverManager | |
| import re | |
| def search_fsbo_address(location): | |
| """Search FSBO for the given address and return the result page.""" | |
| options = Options() | |
| options.add_argument("--incognito") | |
| options.add_argument("--disable-blink-features=AutomationControlled") | |
| options.add_argument("start-maximized") | |
| options.add_argument("--disable-gpu") | |
| options.add_argument("--log-level=3") | |
| options.add_argument( | |
| "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
| ) | |
| driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) | |
| driver.get("https://fsbo.com/") | |
| try: | |
| search_box = WebDriverWait(driver, 10).until( | |
| EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input")) | |
| ) | |
| search_box.clear() | |
| search_box.send_keys(location) | |
| time.sleep(2) | |
| search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button") | |
| search_button.click() | |
| time.sleep(5) | |
| return driver | |
| except Exception as e: | |
| st.error(f"Error finding FSBO URL: {e}") | |
| driver.quit() | |
| return None | |
| def clean_text(text): | |
| """Cleans extracted text by removing Listing ID, unnecessary content, and 'View Listing Details'.""" | |
| lines = [line.strip() for line in text.split("\n") if line.strip()] | |
| # Remove Listing ID (e.g., "Listing ID#541799 - ") | |
| lines = [re.sub(r"Listing ID#\d+\s*-", "", line) for line in lines] | |
| # Remove "View Listing Details" | |
| lines = [line for line in lines if "View Listing Details" not in line] | |
| return " | ".join(lines) | |
| def scrape_first_house(driver): | |
| """Scrape only the first house and return its details properly formatted.""" | |
| try: | |
| WebDriverWait(driver, 10).until( | |
| EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) | |
| ) | |
| first_listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div") | |
| details_text = first_listing.get_attribute("innerText").strip() | |
| formatted_details = clean_text(details_text) | |
| return formatted_details | |
| except: | |
| return "N/A" | |
| def scrape_all_houses(driver): | |
| """Scrape all houses and return a list of details properly formatted.""" | |
| houses = [] | |
| try: | |
| WebDriverWait(driver, 10).until( | |
| EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) | |
| ) | |
| listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div") | |
| for index, listing in enumerate(listings, start=1): | |
| try: | |
| details_text = listing.get_attribute("innerText").strip() | |
| formatted_details = clean_text(details_text) | |
| houses.append(f"**{index}.** {formatted_details}") | |
| except: | |
| continue | |
| except: | |
| return [] | |
| return houses | |
| def main(): | |
| st.title("FSBO House Price Finder") | |
| location = st.text_input("Enter Address:") | |
| if st.button("Search"): | |
| with st.spinner("Fetching house details..."): | |
| driver = search_fsbo_address(location) | |
| if not driver: | |
| return | |
| first_house = scrape_first_house(driver) | |
| if first_house != "N/A": | |
| st.success("**First House Found:**") | |
| st.write(first_house) | |
| st.info("Wait... getting all houses in the area") | |
| all_houses = scrape_all_houses(driver) | |
| if all_houses: | |
| st.success("**All Houses in the Area:**") | |
| for house in all_houses: | |
| st.write(house) | |
| else: | |
| st.error("No additional houses found.") | |
| driver.quit() | |
| if __name__ == "__main__": | |
| main() | |
| # version: 2 | |
| # import streamlit as st | |
| # import time | |
| # from selenium import webdriver | |
| # from selenium.webdriver.common.by import By | |
| # from selenium.webdriver.chrome.service import Service | |
| # from selenium.webdriver.chrome.options import Options | |
| # from selenium.webdriver.support.ui import WebDriverWait | |
| # from selenium.webdriver.support import expected_conditions as EC | |
| # from webdriver_manager.chrome import ChromeDriverManager | |
| # def search_fsbo_address(location): | |
| # """Search FSBO for the given address and return the result page.""" | |
| # options = Options() | |
| # options.add_argument("--incognito") | |
| # options.add_argument("--disable-blink-features=AutomationControlled") | |
| # options.add_argument("start-maximized") | |
| # options.add_argument("--disable-gpu") | |
| # options.add_argument("--log-level=3") | |
| # options.add_argument( | |
| # "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
| # ) | |
| # driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) | |
| # driver.get("https://fsbo.com/") | |
| # try: | |
| # search_box = WebDriverWait(driver, 10).until( | |
| # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input")) | |
| # ) | |
| # search_box.clear() | |
| # search_box.send_keys(location) | |
| # time.sleep(2) | |
| # search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button") | |
| # search_button.click() | |
| # time.sleep(5) | |
| # return driver | |
| # except Exception as e: | |
| # st.error(f"Error finding FSBO URL: {e}") | |
| # driver.quit() | |
| # return None | |
| # def format_details(details_text): | |
| # """Formats details by adding '|' after each line.""" | |
| # lines = [line.strip() for line in details_text.split("\n") if line.strip()] | |
| # return " | ".join(lines) | |
| # def scrape_first_house(driver): | |
| # """Scrape only the first house and return its details.""" | |
| # try: | |
| # WebDriverWait(driver, 10).until( | |
| # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) | |
| # ) | |
| # listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div") | |
| # details_text = listing.get_attribute("innerText").strip() | |
| # return format_details(details_text) | |
| # except: | |
| # return "N/A" | |
| # def scrape_all_houses(driver): | |
| # """Scrape all houses and return a list of details.""" | |
| # houses = [] | |
| # try: | |
| # WebDriverWait(driver, 10).until( | |
| # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) | |
| # ) | |
| # listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div") | |
| # for listing in listings: | |
| # try: | |
| # details_text = listing.get_attribute("innerText").strip() | |
| # formatted_details = format_details(details_text) | |
| # houses.append(formatted_details) | |
| # except: | |
| # continue | |
| # except: | |
| # return [] | |
| # return houses | |
| # def main(): | |
| # st.title("FSBO House Price Finder") | |
| # location = st.text_input("Enter Address:") | |
| # if st.button("Search"): | |
| # with st.spinner("Fetching house details..."): | |
| # driver = search_fsbo_address(location) | |
| # if not driver: | |
| # return | |
| # first_house = scrape_first_house(driver) | |
| # if first_house != "N/A": | |
| # st.success("First House Found:") | |
| # st.write(first_house) | |
| # st.info("Wait... getting all houses in the area") | |
| # all_houses = scrape_all_houses(driver) | |
| # if all_houses: | |
| # st.success("All Houses in the Area:") | |
| # for house in all_houses: | |
| # st.write(house) | |
| # else: | |
| # st.error("No additional houses found.") | |
| # driver.quit() | |
| # if __name__ == "__main__": | |
| # main() | |
| # Best Version1. it is scrapping and displaying first house details correctly. | |
| # import streamlit as st | |
| # import threading | |
| # from selenium import webdriver | |
| # from selenium.webdriver.common.by import By | |
| # from selenium.webdriver.chrome.service import Service | |
| # from selenium.webdriver.chrome.options import Options | |
| # from selenium.webdriver.support.ui import WebDriverWait | |
| # from selenium.webdriver.support import expected_conditions as EC | |
| # from webdriver_manager.chrome import ChromeDriverManager | |
| # import time | |
| # def search_fsbo_address(location): | |
| # """Search FSBO for the given address and return the first result's URL.""" | |
| # options = Options() | |
| # options.add_argument("--incognito") | |
| # options.add_argument("--disable-blink-features=AutomationControlled") | |
| # options.add_argument("start-maximized") | |
| # options.add_argument("--disable-gpu") | |
| # options.add_argument("--log-level=3") | |
| # options.add_argument( | |
| # "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
| # ) | |
| # driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) | |
| # driver.get("https://fsbo.com/") | |
| # try: | |
| # search_box = WebDriverWait(driver, 10).until( | |
| # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input")) | |
| # ) | |
| # search_box.clear() | |
| # search_box.send_keys(location) | |
| # time.sleep(2) | |
| # search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button") | |
| # search_button.click() | |
| # time.sleep(5) | |
| # fsbo_url = driver.current_url | |
| # return driver, fsbo_url | |
| # except Exception as e: | |
| # print("Error finding correct FSBO URL:", e) | |
| # driver.quit() | |
| # return None, None | |
| # def format_details(details_text): | |
| # """Formats the details by adding '|' after each line.""" | |
| # lines = [line.strip() for line in details_text.split("\n") if line.strip()] | |
| # return " | ".join(lines) # Join lines with '|' | |
| # def scrape_fsbo_details(location): | |
| # """Find the correct FSBO URL and scrape house details.""" | |
| # driver, fsbo_url = search_fsbo_address(location) | |
| # if not fsbo_url: | |
| # return {"Details": "N/A", "Link": "N/A"} | |
| # try: | |
| # WebDriverWait(driver, 10).until( | |
| # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) | |
| # ) | |
| # listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div") | |
| # # Extract raw text | |
| # details_text = listing.get_attribute("innerText").strip() | |
| # # Format details by adding '|' | |
| # formatted_details = format_details(details_text) | |
| # except: | |
| # driver.quit() | |
| # return {"Details": "N/A", "Link": fsbo_url} | |
| # driver.quit() | |
| # return {"Details": formatted_details, "Link": fsbo_url} | |
| # def main(): | |
| # st.title("FSBO House Price Finder") | |
| # location = st.text_input("Enter Address:") | |
| # if st.button("Search"): | |
| # with st.spinner("Fetching house details..."): | |
| # house_data = scrape_fsbo_details(location) | |
| # if house_data: | |
| # st.success("House Details:") | |
| # st.write(house_data["Details"]) # Display formatted details | |
| # st.write(f"[View Listing]({house_data['Link']})") | |
| # threading.Thread(target=scrape_fsbo_details, args=(location,), daemon=True).start() | |
| # else: | |
| # st.error("No results found or address not recognized.") | |
| # if __name__ == "__main__": | |
| # main() | |
| ## working till searching but not able to scrap the details | |
| # import streamlit as st | |
| # import pandas as pd | |
| # import threading | |
| # from selenium import webdriver | |
| # from selenium.webdriver.common.by import By | |
| # from selenium.webdriver.chrome.service import Service | |
| # from selenium.webdriver.chrome.options import Options | |
| # from selenium.webdriver.support.ui import WebDriverWait | |
| # from selenium.webdriver.support import expected_conditions as EC | |
| # from webdriver_manager.chrome import ChromeDriverManager | |
| # import time | |
| # def search_redfin_address(location): | |
| # """Search Redfin for the given address and return the first result's URL.""" | |
| # options = Options() | |
| # # options.add_argument("--headless") | |
| # options.add_argument("--incognito") | |
| # options.add_argument("--disable-blink-features=AutomationControlled") | |
| # options.add_argument("start-maximized") | |
| # options.add_argument( | |
| # "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
| # ) | |
| # driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) | |
| # driver.get("https://fsbo.com/") | |
| # try: | |
| # # Find and enter location into the search box | |
| # search_box = WebDriverWait(driver, 10).until( | |
| # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input")) | |
| # ) | |
| # search_box.clear() | |
| # search_box.send_keys(location) | |
| # time.sleep(2) | |
| # # Click the search button | |
| # search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button") | |
| # search_button.click() | |
| # time.sleep(5) # Allow results to load | |
| # # Return updated URL | |
| # redfin_url = driver.current_url | |
| # return driver, redfin_url | |
| # except Exception as e: | |
| # print("Error finding correct Redfin URL:", e) | |
| # driver.quit() | |
| # return None, None | |
| # def scrape_redfin_details(location): | |
| # """Find the correct Redfin URL and scrape house details.""" | |
| # driver, redfin_url = search_redfin_address(location) | |
| # if not redfin_url: | |
| # return {"Price": "N/A", "Address": "N/A", "Beds": "N/A", "Baths": "N/A", "Sq Ft": "N/A", "Link": "N/A"} | |
| # try: | |
| # WebDriverWait(driver, 10).until( | |
| # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]")) | |
| # ) | |
| # listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]") | |
| # except: | |
| # driver.quit() | |
| # return {"Price": "N/A", "Address": "N/A", "Beds": "N/A", "Baths": "N/A", "Sq Ft": "N/A", "Link": redfin_url} | |
| # houses = [] | |
| # for listing in listings: | |
| # try: | |
| # price = listing.find_element(By.XPATH, ".//div/div/div[3]/div[1]/div[1]/span").text | |
| # except: | |
| # price = "N/A" | |
| # try: | |
| # address = listing.find_element(By.XPATH, ".//div/div/div[3]/div[3]").text | |
| # except: | |
| # address = "N/A" | |
| # try: | |
| # beds = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[1]").text | |
| # except: | |
| # beds = "N/A" | |
| # try: | |
| # baths = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[2]").text | |
| # except: | |
| # baths = "N/A" | |
| # try: | |
| # sqft = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[3]").text | |
| # except: | |
| # sqft = "N/A" | |
| # try: | |
| # link = listing.find_element(By.TAG_NAME, "a").get_attribute("href") | |
| # except: | |
| # link = "N/A" | |
| # houses.append({"Price": price, "Address": address, "Beds": beds, "Baths": baths, "Sq Ft": sqft, "Link": link}) | |
| # break # Return only the first house fast | |
| # driver.quit() | |
| # return houses[0] | |
| # def background_scraping(location): | |
| # """Scrapes additional property details in the background.""" | |
| # pass | |
| # def main(): | |
| # st.title("Redfin House Price Finder") | |
| # location = st.text_input("Enter Address:") | |
| # if st.button("Search"): | |
| # with st.spinner("Fetching house details..."): | |
| # house_data = scrape_redfin_details(location) | |
| # if house_data: | |
| # st.success(f"House Price: {house_data['Price']}") | |
| # st.write(f"**Address:** {house_data['Address']}") | |
| # st.write(f"**Beds:** {house_data['Beds']} | **Baths:** {house_data['Baths']} | **Sq Ft:** {house_data['Sq Ft']}") | |
| # # st.write(f"[View Listing on Redfin]({house_data['Link']})") | |
| # threading.Thread(target=background_scraping, args=(location,), daemon=True).start() | |
| # else: | |
| # st.error("No results found or address not recognized.") | |
| # if __name__ == "__main__": | |
| # main() |