Skip to Content
GuidesReal estate data parsing

Real Estate Data Parsing Guide

Best practices for working with PropAPIS real estate data.

Data Structure

Property Object

Standard property data structure:

{ 'address': '123 Main St, Austin, TX 78701', 'price': 450000, 'bedrooms': 3, 'bathrooms': 2.5, 'sqft': 2100, 'lot_size': 0.25, # acres 'year_built': 1995, 'property_type': 'Single Family', 'zestimate': 465000, 'rent_zestimate': 2500, 'status': 'active', 'days_on_market': 15, 'photos': ['url1', 'url2', ...], 'coordinates': {'lat': 30.267, 'lng': -97.743} }

Error Handling

Handling Missing Data

Always check for None values:

prop = api.platforms.zillow.get_property(address) # Safe access with defaults bedrooms = prop.bedrooms if prop.bedrooms else 0 price = prop.price if prop.price else 0 # Or use getattr with default sqft = getattr(prop, 'sqft', 0)

Try-Except Patterns

Handle API errors gracefully:

from propapis import PropAPIS, APIError api = PropAPIS(api_key='your_api_key') try: prop = api.platforms.zillow.get_property(address) except APIError as e: if e.status_code == 404: print(f"Property not found: {address}") elif e.status_code == 429: print("Rate limit exceeded, retrying...") time.sleep(60) else: print(f"API error: {e}")

Retry Logic

Implement exponential backoff:

import time def get_property_with_retry(address, max_retries=3): for attempt in range(max_retries): try: return api.platforms.zillow.get_property(address) except APIError as e: if attempt < max_retries - 1: wait_time = 2 ** attempt # 1s, 2s, 4s time.sleep(wait_time) else: raise

Data Validation

Validate Property Data

Check data quality before using:

def validate_property(prop): errors = [] if not prop.price or prop.price <= 0: errors.append("Invalid price") if not prop.bedrooms or prop.bedrooms < 0: errors.append("Invalid bedrooms") if prop.sqft and prop.sqft < 100: errors.append("Suspiciously small sqft") if errors: print(f"Validation errors for {prop.address}:") for error in errors: print(f" - {error}") return False return True # Use validation prop = api.platforms.zillow.get_property(address) if validate_property(prop): # Process property pass

Data Cleaning

Clean and normalize data:

def clean_property_data(prop): cleaned = { 'address': prop.address.strip(), 'price': int(prop.price) if prop.price else None, 'bedrooms': int(prop.bedrooms) if prop.bedrooms else None, 'bathrooms': float(prop.bathrooms) if prop.bathrooms else None, 'sqft': int(prop.sqft) if prop.sqft else None, } # Calculate derived fields if cleaned['price'] and cleaned['sqft']: cleaned['price_per_sqft'] = cleaned['price'] / cleaned['sqft'] return cleaned

Batch Processing

Process Multiple Properties

Efficiently process large datasets:

def process_properties_batch(addresses): results = [] for i, address in enumerate(addresses): try: prop = api.platforms.zillow.get_property(address) if validate_property(prop): cleaned = clean_property_data(prop) results.append(cleaned) # Progress indicator if (i + 1) % 10 == 0: print(f"Processed {i + 1}/{len(addresses)}") except Exception as e: print(f"Error processing {address}: {e}") continue return results # Process list addresses = ['123 Main St, Austin, TX', ...] results = process_properties_batch(addresses)

Parallel Processing

Speed up with concurrent requests:

from concurrent.futures import ThreadPoolExecutor def fetch_property(address): try: return api.platforms.zillow.get_property(address) except Exception as e: print(f"Error: {address} - {e}") return None # Process in parallel addresses = ['addr1', 'addr2', 'addr3', ...] with ThreadPoolExecutor(max_workers=5) as executor: properties = list(executor.map(fetch_property, addresses)) # Filter out None values properties = [p for p in properties if p is not None]

Data Export

Export to CSV

import csv def export_to_csv(properties, filename): with open(filename, 'w', newline='') as f: writer = csv.DictWriter(f, fieldnames=[ 'address', 'price', 'bedrooms', 'bathrooms', 'sqft', 'zestimate', 'rent_zestimate' ]) writer.writeheader() for prop in properties: writer.writerow({ 'address': prop.address, 'price': prop.price, 'bedrooms': prop.bedrooms, 'bathrooms': prop.bathrooms, 'sqft': prop.sqft, 'zestimate': prop.zestimate, 'rent_zestimate': prop.rent_zestimate }) # Export export_to_csv(properties, 'properties.csv')

Export to JSON

import json def export_to_json(properties, filename): data = [clean_property_data(p) for p in properties] with open(filename, 'w') as f: json.dump(data, f, indent=2) # Export export_to_json(properties, 'properties.json')

Performance Optimization

Caching

Cache frequently accessed data:

from functools import lru_cache import hashlib @lru_cache(maxsize=1000) def get_property_cached(address): return api.platforms.zillow.get_property(address) # Use cached version prop = get_property_cached('123 Main St, Austin, TX')

Rate Limit Management

Respect rate limits:

import time def rate_limited_requests(addresses, requests_per_minute=100): delay = 60.0 / requests_per_minute for address in addresses: prop = api.platforms.zillow.get_property(address) yield prop time.sleep(delay) # Use generator for prop in rate_limited_requests(addresses): process(prop)

Quick Start

from propapis import PropAPIS api = PropAPIS(api_key='your_api_key') # Basic usage with error handling try: prop = api.platforms.zillow.get_property('123 Main St, Austin, TX') # Validate data if prop.price and prop.bedrooms: print(f"{prop.address} - ${prop.price:,}") else: print("Incomplete data") except Exception as e: print(f"Error: {e}")

For complete API documentation, see our API Reference.