Quote from @David D.:
Quote from @Austin Bright:
Quote from @Austin Bright:
Quote from @David D.:
Yeah, that's just a one liner like this:
url = https://www.redfin.com/TX/Fort-Worth/10725-Lone-Pine-Ln-7610...
the_property_id = url.split("/")[-1]
I don’t have the url yet. This was an example. I mean given the address, can I
1. Pull the coordinates with python
2. Return the property ID
3. Using Power Query and the Propery ID, create a custom URL for each address
4. Pull back the html and parse through it to get beds, baths etc.
Can the code be modified to do step 1/2?
can it handle only a few addresses? My list is in the 1,000s.
sorry for the 20 questions, you’ve been super helpful!
I know I can do 3/4 I’m curious on 1/2
The code already does this. To get the property ID from the urls the code generates, you can set up a new column in excel that splits the url to get the last part (the property id). Or you can keep the url like it is and use power query to pull data from that webpage on the property.
Hi David,
Thanks for writing that code. To get it to run, I need to replace that first text with my API Key? Is this the right "Key" (screenshot attached). There's a bunch of them. When I push run, it looks like nothing is coming back (see screenshot). Where does the Google Collab return the Property IDs? I'm testing with the California addresses you put in there at the bottom.
import requests
from geopy.geocoders import Nominatim
import json
import time
APIFY_API_URL = '
![](https://bpimg.biggerpockets.com/no_overlay/uploads/uploaded_images/1717963730-python_output.PNG?twic=v1/output=image/quality=55/contain=800x800)
' # Plug in the provided API URL
def geocode_address(address):
geolocator = Nominatim(user_agent="redfin_scraper", timeout=10) # Increased timeout to 10 seconds
try:
location = geolocator.geocode(address)
if location:
return location.latitude, location.longitude
except Exception as e:
print(f"Error geocoding address {address}: {e}")
return None, None
def search_redfin(lat, lng):
search_url = f"https://www.redfin.com/stingray/do/location-autocomplete?location={lat},{lng}"
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(search_url, headers=headers)
if response.status_code == 200:
try:
json_text = response.text.split('&&')[1]
return json.loads(json_text)
except (json.JSONDecodeError, IndexError) as e:
print("Error decoding JSON response:", e)
print("Response text:", response.text)
return None
return None
def extract_home_id(search_results):
try:
payload = search_results.get('payload')
if not payload:
return None
region_views = payload.get('regionViews')
if not region_views:
return None
root = region_views.get('__root')
if not root:
return None
for result in root:
if 'home' in result:
return result['home']['id']
except KeyError as e:
print(f"KeyError: {e}")
return None
def construct_redfin_detail_url(state, city, street, zip_code, home_id):
street_formatted = street.replace(' ', '-')
city_formatted = city.replace(' ', '-')
return f"https://www.redfin.com/{state}/{city_formatted}/{street_formatted}/home/{home_id}"
def query_apify_redfin_scraper(detail_url):
api_url = APIFY_API_URL
payload = {
"detailUrls": [{"url": detail_url}],
"debugLog": False
}
headers = {
"Content-Type": "application/json"
}
response = requests.post(api_url, headers=headers, data=json.dumps(payload))
if response.status_code == 201:
try:
return response.json()['data']['id']
except json.JSONDecodeError as e:
print("Error decoding JSON response:", e)
print("Response text:", response.text)
return None
return None
def get_apify_scraper_results(run_id):
api_url = f"https://api.apify.com/v2/acts/tri_angle~redfin-detail/runs/{run_id}/dataset/items?token=***"
while True:
response = requests.get(api_url)
if response.status_code == 200:
try:
data = response.json()
if data:
return data
except json.JSONDecodeError as e:
print("Error decoding JSON response:", e)
print("Response text:", response.text)
return None
time.sleep(10) # Wait for 10 seconds before retrying
return None
def main(addresses):
results = []
for address in addresses:
lat, lng = geocode_address(address)
if lat and lng:
search_results = search_redfin(lat, lng)
if search_results:
home_id = extract_home_id(search_results)
if home_id:
parts = address.split(',')
street = parts[0].strip()
city = parts[1].strip()
state_zip = parts[2].strip().split(' ')
state = state_zip[0]
zip_code = state_zip[1]
detail_url = construct_redfin_detail_url(state, city, street, zip_code, home_id)
run_id = query_apify_redfin_scraper(detail_url)
if run_id:
scraper_results = get_apify_scraper_results(run_id)
results.append(scraper_results)
return results
addresses = [
"123 Main St, Los Angeles, CA 90001",
"456 Elm St, San Francisco, CA 94102"
# Add more addresses here
]
detail_urls = main(addresses)
print(json.dumps(detail_urls, indent=2))