Krish-Upgrix commited on
Commit
71601e8
·
verified ·
1 Parent(s): 4d4a356

Upload 2 files

Browse files
Files changed (2) hide show
  1. FSBO_app.py +488 -0
  2. requirements.txt +9 -0
FSBO_app.py ADDED
@@ -0,0 +1,488 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Version: 3
2
+ import streamlit as st
3
+ import time
4
+ from selenium import webdriver
5
+ from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.chrome.service import Service
7
+ from selenium.webdriver.chrome.options import Options
8
+ from selenium.webdriver.support.ui import WebDriverWait
9
+ from selenium.webdriver.support import expected_conditions as EC
10
+ from webdriver_manager.chrome import ChromeDriverManager
11
+ import re
12
+
13
+ def search_fsbo_address(location):
14
+ """Search FSBO for the given address and return the result page."""
15
+ options = Options()
16
+ options.add_argument("--incognito")
17
+ options.add_argument("--disable-blink-features=AutomationControlled")
18
+ options.add_argument("start-maximized")
19
+ options.add_argument("--disable-gpu")
20
+ options.add_argument("--log-level=3")
21
+ options.add_argument(
22
+ "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
23
+ )
24
+
25
+ driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
26
+ driver.get("https://fsbo.com/")
27
+
28
+ try:
29
+ search_box = WebDriverWait(driver, 10).until(
30
+ EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input"))
31
+ )
32
+ search_box.clear()
33
+ search_box.send_keys(location)
34
+ time.sleep(2)
35
+
36
+ search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button")
37
+ search_button.click()
38
+ time.sleep(5)
39
+
40
+ return driver
41
+ except Exception as e:
42
+ st.error(f"Error finding FSBO URL: {e}")
43
+ driver.quit()
44
+ return None
45
+
46
+ def clean_text(text):
47
+ """Cleans extracted text by removing Listing ID, unnecessary content, and 'View Listing Details'."""
48
+ lines = [line.strip() for line in text.split("\n") if line.strip()]
49
+
50
+ # Remove Listing ID (e.g., "Listing ID#541799 - ")
51
+ lines = [re.sub(r"Listing ID#\d+\s*-", "", line) for line in lines]
52
+
53
+ # Remove "View Listing Details"
54
+ lines = [line for line in lines if "View Listing Details" not in line]
55
+
56
+ return " | ".join(lines)
57
+
58
+ def scrape_first_house(driver):
59
+ """Scrape only the first house and return its details properly formatted."""
60
+ try:
61
+ WebDriverWait(driver, 10).until(
62
+ EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
63
+ )
64
+ first_listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div")
65
+
66
+ details_text = first_listing.get_attribute("innerText").strip()
67
+ formatted_details = clean_text(details_text)
68
+
69
+ return formatted_details
70
+ except:
71
+ return "N/A"
72
+
73
+ def scrape_all_houses(driver):
74
+ """Scrape all houses and return a list of details properly formatted."""
75
+ houses = []
76
+ try:
77
+ WebDriverWait(driver, 10).until(
78
+ EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
79
+ )
80
+ listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div")
81
+
82
+ for index, listing in enumerate(listings, start=1):
83
+ try:
84
+ details_text = listing.get_attribute("innerText").strip()
85
+ formatted_details = clean_text(details_text)
86
+ houses.append(f"**{index}.** {formatted_details}")
87
+ except:
88
+ continue
89
+ except:
90
+ return []
91
+
92
+ return houses
93
+
94
+ def main():
95
+ st.title("FSBO House Price Finder")
96
+ location = st.text_input("Enter Address:")
97
+
98
+ if st.button("Search"):
99
+ with st.spinner("Fetching house details..."):
100
+ driver = search_fsbo_address(location)
101
+ if not driver:
102
+ return
103
+
104
+ first_house = scrape_first_house(driver)
105
+ if first_house != "N/A":
106
+ st.success("**First House Found:**")
107
+ st.write(first_house)
108
+
109
+ st.info("Wait... getting all houses in the area")
110
+ all_houses = scrape_all_houses(driver)
111
+
112
+ if all_houses:
113
+ st.success("**All Houses in the Area:**")
114
+ for house in all_houses:
115
+ st.write(house)
116
+ else:
117
+ st.error("No additional houses found.")
118
+
119
+ driver.quit()
120
+
121
+ if __name__ == "__main__":
122
+ main()
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+ # version: 2
137
+
138
+ # import streamlit as st
139
+ # import time
140
+ # from selenium import webdriver
141
+ # from selenium.webdriver.common.by import By
142
+ # from selenium.webdriver.chrome.service import Service
143
+ # from selenium.webdriver.chrome.options import Options
144
+ # from selenium.webdriver.support.ui import WebDriverWait
145
+ # from selenium.webdriver.support import expected_conditions as EC
146
+ # from webdriver_manager.chrome import ChromeDriverManager
147
+
148
+ # def search_fsbo_address(location):
149
+ # """Search FSBO for the given address and return the result page."""
150
+ # options = Options()
151
+ # options.add_argument("--incognito")
152
+ # options.add_argument("--disable-blink-features=AutomationControlled")
153
+ # options.add_argument("start-maximized")
154
+ # options.add_argument("--disable-gpu")
155
+ # options.add_argument("--log-level=3")
156
+ # options.add_argument(
157
+ # "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
158
+ # )
159
+
160
+ # driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
161
+ # driver.get("https://fsbo.com/")
162
+
163
+ # try:
164
+ # search_box = WebDriverWait(driver, 10).until(
165
+ # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input"))
166
+ # )
167
+ # search_box.clear()
168
+ # search_box.send_keys(location)
169
+ # time.sleep(2)
170
+
171
+ # search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button")
172
+ # search_button.click()
173
+ # time.sleep(5)
174
+
175
+ # return driver
176
+ # except Exception as e:
177
+ # st.error(f"Error finding FSBO URL: {e}")
178
+ # driver.quit()
179
+ # return None
180
+
181
+ # def format_details(details_text):
182
+ # """Formats details by adding '|' after each line."""
183
+ # lines = [line.strip() for line in details_text.split("\n") if line.strip()]
184
+ # return " | ".join(lines)
185
+
186
+ # def scrape_first_house(driver):
187
+ # """Scrape only the first house and return its details."""
188
+ # try:
189
+ # WebDriverWait(driver, 10).until(
190
+ # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
191
+ # )
192
+ # listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div")
193
+
194
+ # details_text = listing.get_attribute("innerText").strip()
195
+ # return format_details(details_text)
196
+ # except:
197
+ # return "N/A"
198
+
199
+ # def scrape_all_houses(driver):
200
+ # """Scrape all houses and return a list of details."""
201
+ # houses = []
202
+ # try:
203
+ # WebDriverWait(driver, 10).until(
204
+ # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
205
+ # )
206
+ # listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div")
207
+
208
+ # for listing in listings:
209
+ # try:
210
+ # details_text = listing.get_attribute("innerText").strip()
211
+ # formatted_details = format_details(details_text)
212
+ # houses.append(formatted_details)
213
+ # except:
214
+ # continue
215
+ # except:
216
+ # return []
217
+
218
+ # return houses
219
+
220
+ # def main():
221
+ # st.title("FSBO House Price Finder")
222
+ # location = st.text_input("Enter Address:")
223
+
224
+ # if st.button("Search"):
225
+ # with st.spinner("Fetching house details..."):
226
+ # driver = search_fsbo_address(location)
227
+ # if not driver:
228
+ # return
229
+
230
+ # first_house = scrape_first_house(driver)
231
+ # if first_house != "N/A":
232
+ # st.success("First House Found:")
233
+ # st.write(first_house)
234
+
235
+ # st.info("Wait... getting all houses in the area")
236
+ # all_houses = scrape_all_houses(driver)
237
+
238
+ # if all_houses:
239
+ # st.success("All Houses in the Area:")
240
+ # for house in all_houses:
241
+ # st.write(house)
242
+ # else:
243
+ # st.error("No additional houses found.")
244
+
245
+ # driver.quit()
246
+
247
+ # if __name__ == "__main__":
248
+ # main()
249
+
250
+
251
+
252
+
253
+
254
+
255
+
256
+
257
+
258
+
259
+
260
+ # Best Version1. it is scrapping and displaying first house details correctly.
261
+
262
+ # import streamlit as st
263
+ # import threading
264
+ # from selenium import webdriver
265
+ # from selenium.webdriver.common.by import By
266
+ # from selenium.webdriver.chrome.service import Service
267
+ # from selenium.webdriver.chrome.options import Options
268
+ # from selenium.webdriver.support.ui import WebDriverWait
269
+ # from selenium.webdriver.support import expected_conditions as EC
270
+ # from webdriver_manager.chrome import ChromeDriverManager
271
+ # import time
272
+
273
+ # def search_fsbo_address(location):
274
+ # """Search FSBO for the given address and return the first result's URL."""
275
+ # options = Options()
276
+ # options.add_argument("--incognito")
277
+ # options.add_argument("--disable-blink-features=AutomationControlled")
278
+ # options.add_argument("start-maximized")
279
+ # options.add_argument("--disable-gpu")
280
+ # options.add_argument("--log-level=3")
281
+ # options.add_argument(
282
+ # "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
283
+ # )
284
+
285
+ # driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
286
+ # driver.get("https://fsbo.com/")
287
+
288
+ # try:
289
+ # search_box = WebDriverWait(driver, 10).until(
290
+ # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input"))
291
+ # )
292
+ # search_box.clear()
293
+ # search_box.send_keys(location)
294
+ # time.sleep(2)
295
+
296
+ # search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button")
297
+ # search_button.click()
298
+ # time.sleep(5)
299
+
300
+ # fsbo_url = driver.current_url
301
+ # return driver, fsbo_url
302
+ # except Exception as e:
303
+ # print("Error finding correct FSBO URL:", e)
304
+ # driver.quit()
305
+ # return None, None
306
+
307
+ # def format_details(details_text):
308
+ # """Formats the details by adding '|' after each line."""
309
+ # lines = [line.strip() for line in details_text.split("\n") if line.strip()]
310
+ # return " | ".join(lines) # Join lines with '|'
311
+
312
+ # def scrape_fsbo_details(location):
313
+ # """Find the correct FSBO URL and scrape house details."""
314
+ # driver, fsbo_url = search_fsbo_address(location)
315
+ # if not fsbo_url:
316
+ # return {"Details": "N/A", "Link": "N/A"}
317
+
318
+ # try:
319
+ # WebDriverWait(driver, 10).until(
320
+ # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
321
+ # )
322
+ # listing = driver.find_element(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]/div[1]/div[1]/div")
323
+
324
+ # # Extract raw text
325
+ # details_text = listing.get_attribute("innerText").strip()
326
+
327
+ # # Format details by adding '|'
328
+ # formatted_details = format_details(details_text)
329
+ # except:
330
+ # driver.quit()
331
+ # return {"Details": "N/A", "Link": fsbo_url}
332
+
333
+ # driver.quit()
334
+ # return {"Details": formatted_details, "Link": fsbo_url}
335
+
336
+ # def main():
337
+ # st.title("FSBO House Price Finder")
338
+ # location = st.text_input("Enter Address:")
339
+
340
+ # if st.button("Search"):
341
+ # with st.spinner("Fetching house details..."):
342
+ # house_data = scrape_fsbo_details(location)
343
+
344
+ # if house_data:
345
+ # st.success("House Details:")
346
+ # st.write(house_data["Details"]) # Display formatted details
347
+ # st.write(f"[View Listing]({house_data['Link']})")
348
+ # threading.Thread(target=scrape_fsbo_details, args=(location,), daemon=True).start()
349
+ # else:
350
+ # st.error("No results found or address not recognized.")
351
+
352
+ # if __name__ == "__main__":
353
+ # main()
354
+
355
+
356
+
357
+
358
+
359
+
360
+
361
+
362
+
363
+
364
+
365
+
366
+
367
+ ## working till searching but not able to scrap the details
368
+
369
+
370
+ # import streamlit as st
371
+ # import pandas as pd
372
+ # import threading
373
+ # from selenium import webdriver
374
+ # from selenium.webdriver.common.by import By
375
+ # from selenium.webdriver.chrome.service import Service
376
+ # from selenium.webdriver.chrome.options import Options
377
+ # from selenium.webdriver.support.ui import WebDriverWait
378
+ # from selenium.webdriver.support import expected_conditions as EC
379
+ # from webdriver_manager.chrome import ChromeDriverManager
380
+ # import time
381
+
382
+ # def search_redfin_address(location):
383
+ # """Search Redfin for the given address and return the first result's URL."""
384
+ # options = Options()
385
+ # # options.add_argument("--headless")
386
+ # options.add_argument("--incognito")
387
+ # options.add_argument("--disable-blink-features=AutomationControlled")
388
+ # options.add_argument("start-maximized")
389
+ # options.add_argument(
390
+ # "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
391
+ # )
392
+
393
+ # driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
394
+ # driver.get("https://fsbo.com/")
395
+
396
+ # try:
397
+ # # Find and enter location into the search box
398
+ # search_box = WebDriverWait(driver, 10).until(
399
+ # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/input"))
400
+ # )
401
+ # search_box.clear()
402
+ # search_box.send_keys(location)
403
+ # time.sleep(2)
404
+
405
+ # # Click the search button
406
+ # search_button = driver.find_element(By.XPATH, "/html/body/div[2]/main/div[1]/div[6]/div/div/div[4]/form/div/div/button")
407
+ # search_button.click()
408
+ # time.sleep(5) # Allow results to load
409
+
410
+ # # Return updated URL
411
+ # redfin_url = driver.current_url
412
+ # return driver, redfin_url
413
+ # except Exception as e:
414
+ # print("Error finding correct Redfin URL:", e)
415
+ # driver.quit()
416
+ # return None, None
417
+
418
+ # def scrape_redfin_details(location):
419
+ # """Find the correct Redfin URL and scrape house details."""
420
+ # driver, redfin_url = search_redfin_address(location)
421
+ # if not redfin_url:
422
+ # return {"Price": "N/A", "Address": "N/A", "Beds": "N/A", "Baths": "N/A", "Sq Ft": "N/A", "Link": "N/A"}
423
+
424
+ # try:
425
+ # WebDriverWait(driver, 10).until(
426
+ # EC.presence_of_element_located((By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]"))
427
+ # )
428
+ # listings = driver.find_elements(By.XPATH, "/html/body/div[2]/main/div/div[2]/div[1]/div[2]")
429
+ # except:
430
+ # driver.quit()
431
+ # return {"Price": "N/A", "Address": "N/A", "Beds": "N/A", "Baths": "N/A", "Sq Ft": "N/A", "Link": redfin_url}
432
+
433
+ # houses = []
434
+ # for listing in listings:
435
+ # try:
436
+ # price = listing.find_element(By.XPATH, ".//div/div/div[3]/div[1]/div[1]/span").text
437
+ # except:
438
+ # price = "N/A"
439
+ # try:
440
+ # address = listing.find_element(By.XPATH, ".//div/div/div[3]/div[3]").text
441
+ # except:
442
+ # address = "N/A"
443
+ # try:
444
+ # beds = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[1]").text
445
+ # except:
446
+ # beds = "N/A"
447
+ # try:
448
+ # baths = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[2]").text
449
+ # except:
450
+ # baths = "N/A"
451
+ # try:
452
+ # sqft = listing.find_element(By.XPATH, ".//div/div/div[3]/div[2]/span[3]").text
453
+ # except:
454
+ # sqft = "N/A"
455
+ # try:
456
+ # link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
457
+ # except:
458
+ # link = "N/A"
459
+
460
+ # houses.append({"Price": price, "Address": address, "Beds": beds, "Baths": baths, "Sq Ft": sqft, "Link": link})
461
+ # break # Return only the first house fast
462
+
463
+ # driver.quit()
464
+ # return houses[0]
465
+
466
+ # def background_scraping(location):
467
+ # """Scrapes additional property details in the background."""
468
+ # pass
469
+
470
+ # def main():
471
+ # st.title("Redfin House Price Finder")
472
+ # location = st.text_input("Enter Address:")
473
+
474
+ # if st.button("Search"):
475
+ # with st.spinner("Fetching house details..."):
476
+ # house_data = scrape_redfin_details(location)
477
+
478
+ # if house_data:
479
+ # st.success(f"House Price: {house_data['Price']}")
480
+ # st.write(f"**Address:** {house_data['Address']}")
481
+ # st.write(f"**Beds:** {house_data['Beds']} | **Baths:** {house_data['Baths']} | **Sq Ft:** {house_data['Sq Ft']}")
482
+ # # st.write(f"[View Listing on Redfin]({house_data['Link']})")
483
+ # threading.Thread(target=background_scraping, args=(location,), daemon=True).start()
484
+ # else:
485
+ # st.error("No results found or address not recognized.")
486
+
487
+ # if __name__ == "__main__":
488
+ # main()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ selenium
2
+ pandas
3
+ webdriver-manager
4
+ undetected-chromedriver
5
+ setuptools
6
+ selenium-stealth
7
+ streamlit
8
+ playwright
9
+ asyncio