The following content is a read-only preview of an executable Jupyter notebook.To run this notebook interactively:
- Go to Wherobots Cloud.
- Start a runtime.
- Open the notebook.
- In the Jupyter Launcher:
- Click File > Open Path.
- Paste the following path to access this notebook:
examples/Analyzing_Data/California_Coastal_Flood_Risk_Analysis.ipynb - Click Enter.
wherobots_open_data.partner_samples.regrid_parcelsCoverage: ~6.2M California parcels with FEMA flood zone designations
Output: City-level parcel counts, property value exposure, and land-use breakdowns by flood risk tier
FEMA Flood Zone Reference
| Tier | Zones | Meaning |
|---|---|---|
| Extreme — Coastal Wave | VE, V | Coastal high hazard: base flood + wave action |
| High — 100-Year Floodplain | A, AE, AO, AH, A99 | 1% annual chance flood |
| Moderate — 500-Year Floodplain | X (0.2% subtype) | 0.2% annual chance flood |
| Levee-Protected | X (levee subtype) | Reduced risk due to levee |
| Minimal | X (minimal subtype) | Outside mapped flood hazard |
| Undetermined | D | Not studied |
1. Setup
from sedona.spark import *
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np
config = SedonaContext.builder().getOrCreate()
sedona = SedonaContext.create(config)
print("SedonaContext ready.")
2. Configuration
AdjustCOASTAL_CITIES and HIGH_RISK_ZONES to expand or refine the analysis scope.
# ── Target cities (scity field in Regrid, uppercase) ──────────────────────────
COASTAL_CITIES = [
'LOS ANGELES', 'SAN DIEGO', 'LONG BEACH', 'HUNTINGTON BEACH',
'NEWPORT BEACH', 'VENICE', 'MALIBU', 'SANTA MONICA',
'SANTA BARBARA', 'OXNARD', 'SANTA CRUZ', 'MONTEREY',
'HALF MOON BAY', 'PACIFICA', 'RICHMOND', 'ALAMEDA',
'OAKLAND', 'EUREKA', 'CRESCENT CITY'
]
# ── FEMA zones treated as high or extreme risk ────────────────────────────────
HIGH_RISK_ZONES = ('A', 'AE', 'AO', 'AH', 'A99', 'VE', 'V')
COASTAL_WAVE_ZONES = ('VE', 'V') # highest-severity subset
MODERATE_RISK_SUBTYPE = '0.2 PCT ANNUAL CHANCE FLOOD HAZARD'
# ── Table reference ───────────────────────────────────────────────────────────
PARCEL_TABLE = 'wherobots_open_data.partner_samples.regrid_parcels'
cities_sql = ', '.join(f"'{c}'" for c in COASTAL_CITIES)
zones_sql = ', '.join(f"'{z}'" for z in HIGH_RISK_ZONES)
print(f"Analyzing {len(COASTAL_CITIES)} coastal cities across {len(HIGH_RISK_ZONES)} high-risk zone types.")
3. Statewide Flood Zone Distribution
Baseline view of all California parcels by FEMA zone tier.statewide_df = sedona.sql(f"""
SELECT
fema_flood_zone,
fema_flood_zone_subtype,
COUNT(*) AS parcel_count
FROM {PARCEL_TABLE}
WHERE state2 = 'CA'
AND fema_flood_zone != ''
GROUP BY fema_flood_zone, fema_flood_zone_subtype
ORDER BY parcel_count DESC
""").toPandas()
print(f"Total CA parcels with FEMA zone data: {statewide_df['parcel_count'].sum():,.0f}")
statewide_df
# Assign risk tier labels for charting
def assign_tier(row):
z = row['fema_flood_zone']
sub = row['fema_flood_zone_subtype']
if z in ('VE', 'V'): return 'Extreme (VE/V)'
if z in ('A', 'AE', 'AO', 'AH', 'A99'): return 'High (A/AE/AO/AH)'
if z == 'X' and sub == '0.2 PCT ANNUAL CHANCE FLOOD HAZARD': return 'Moderate (500-yr)'
if z == 'X' and 'LEVEE' in sub: return 'Levee-Protected'
if z == 'X': return 'Minimal (X)'
if z == 'D': return 'Undetermined (D)'
return 'Other'
statewide_df['tier'] = statewide_df.apply(assign_tier, axis=1)
tier_summary = statewide_df.groupby('tier')['parcel_count'].sum().sort_values(ascending=False)
tier_colors = {
'Minimal (X)': '#D3D1C7',
'Moderate (500-yr)': '#B5D4F4',
'Levee-Protected': '#B4B2A9',
'High (A/AE/AO/AH)': '#EF9F27',
'Extreme (VE/V)': '#E24B4A',
'Undetermined (D)': '#888780',
'Other': '#cccccc',
}
fig, ax = plt.subplots(figsize=(7, 7))
wedge_colors = [tier_colors.get(t, '#cccccc') for t in tier_summary.index]
wedges, texts, autotexts = ax.pie(
tier_summary.values,
labels=tier_summary.index,
colors=wedge_colors,
autopct=lambda p: f'{p:.1f}%' if p > 1 else '',
startangle=140,
wedgeprops=dict(width=0.55)
)
for t in texts: t.set_fontsize(10)
for t in autotexts: t.set_fontsize(9)
ax.set_title('California parcels by FEMA flood risk tier', fontsize=13, pad=16)
plt.tight_layout()
plt.savefig('ca_statewide_flood_tiers.png', dpi=150, bbox_inches='tight')
plt.show()
print(tier_summary.to_string())
4. City-Level Parcel Count by Flood Zone
city_zone_df = sedona.sql(f"""
SELECT
scity AS city,
fema_flood_zone AS zone,
COUNT(*) AS parcel_count,
SUM(CAST(parval AS DOUBLE)) AS total_parcel_value,
SUM(CAST(improvval AS DOUBLE)) AS total_improvement_value,
AVG(CAST(parval AS DOUBLE)) AS avg_parcel_value
FROM {PARCEL_TABLE}
WHERE state2 = 'CA'
AND fema_flood_zone IN ({zones_sql})
AND scity IN ({cities_sql})
GROUP BY scity, fema_flood_zone
ORDER BY parcel_count DESC
""").toPandas()
print(f"Rows returned: {len(city_zone_df)}")
city_zone_df.head(15)
# City totals (all high-risk zones combined)
city_totals = (
city_zone_df
.groupby('city')
.agg(
high_risk_parcels = ('parcel_count', 'sum'),
total_parcel_value = ('total_parcel_value', 'sum'),
total_improv_value = ('total_improvement_value','sum'),
)
.reset_index()
.sort_values('high_risk_parcels', ascending=False)
)
city_totals['avg_parcel_value'] = city_totals['total_parcel_value'] / city_totals['high_risk_parcels']
city_totals['value_B'] = city_totals['total_parcel_value'] / 1e9
print(f"Total high-risk parcels across analyzed cities: {city_totals['high_risk_parcels'].sum():,.0f}")
print(f"Total assessed parcel value at risk: ${city_totals['total_parcel_value'].sum()/1e9:.1f}B")
city_totals[['city','high_risk_parcels','total_parcel_value','avg_parcel_value']]
# ── Stacked bar: parcel count by city and zone type ───────────────────────────
band_colors = {'VE/V': '#E24B4A', 'AE': '#378ADD', 'A': '#63C1A0', 'AO/AH': '#EF9F27'}
pivot = city_zone_df.pivot_table(
index='city', columns='zone', values='parcel_count', aggfunc='sum', fill_value=0
)
pivot['VE/V'] = pivot.get('VE', 0) + pivot.get('V', 0)
pivot['AO/AH'] = pivot.get('AO', 0) + pivot.get('AH', 0) + pivot.get('A99', 0)
plot_cols = [c for c in ['VE/V','AE','A','AO/AH'] if c in pivot.columns]
pivot = pivot[plot_cols].sort_values('AE', ascending=False)
fig, ax = plt.subplots(figsize=(12, 5))
bottom = np.zeros(len(pivot))
for col in plot_cols:
ax.bar(pivot.index, pivot[col], bottom=bottom,
label=col, color=band_colors[col], width=0.65)
bottom += pivot[col].values
ax.set_title('High-risk parcels by city — stacked by FEMA zone type', fontsize=13)
ax.set_ylabel('Parcel count')
ax.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'{x:,.0f}'))
ax.legend(title='Zone', bbox_to_anchor=(1.01, 1), loc='upper left')
ax.tick_params(axis='x', rotation=30)
plt.tight_layout()
plt.savefig('ca_parcel_count_by_city_zone.png', dpi=150, bbox_inches='tight')
plt.show()
5. Property Value at Risk by City
# ── Stacked bar: assessed value at risk by city and zone ─────────────────────
val_pivot = city_zone_df.copy()
val_pivot['zone_group'] = val_pivot['zone'].map(
lambda z: 'VE/V' if z in ('VE','V') else
'AE' if z == 'AE' else
'A' if z == 'A' else 'AO/AH'
)
val_pivot = (
val_pivot.groupby(['city','zone_group'])['total_parcel_value']
.sum().reset_index()
.pivot(index='city', columns='zone_group', values='total_parcel_value')
.fillna(0)
.div(1e9) # → $B
)
val_plot_cols = [c for c in ['VE/V','AE','A','AO/AH'] if c in val_pivot.columns]
val_pivot['_total'] = val_pivot[val_plot_cols].sum(axis=1)
val_pivot = val_pivot.sort_values('_total', ascending=False).drop(columns='_total')
fig, ax = plt.subplots(figsize=(12, 5))
bottom = np.zeros(len(val_pivot))
for col in val_plot_cols:
ax.bar(val_pivot.index, val_pivot[col], bottom=bottom,
label=col, color=band_colors[col], width=0.65)
bottom += val_pivot[col].values
ax.set_title('Total assessed parcel value at risk by city ($B)', fontsize=13)
ax.set_ylabel('Assessed value ($B)')
ax.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'${x:.1f}B'))
ax.legend(title='Zone', bbox_to_anchor=(1.01, 1), loc='upper left')
ax.tick_params(axis='x', rotation=30)
plt.tight_layout()
plt.savefig('ca_value_at_risk_by_city.png', dpi=150, bbox_inches='tight')
plt.show()
# ── Average assessed value per parcel in high-risk zones ─────────────────────
avg_val = city_totals.sort_values('avg_parcel_value', ascending=False)
fig, ax = plt.subplots(figsize=(11, 4))
bars = ax.bar(avg_val['city'], avg_val['avg_parcel_value'] / 1e6,
color='#378ADD', width=0.65)
ax.set_title('Average assessed parcel value in high-risk zones ($M)', fontsize=13)
ax.set_ylabel('Avg value per parcel ($M)')
ax.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'${x:.1f}M'))
ax.tick_params(axis='x', rotation=30)
for bar in bars:
h = bar.get_height()
ax.text(bar.get_x() + bar.get_width() / 2, h + 0.05,
f'${h:.1f}M', ha='center', va='bottom', fontsize=8.5)
plt.tight_layout()
plt.savefig('ca_avg_parcel_value.png', dpi=150, bbox_inches='tight')
plt.show()
6. Coastal Wave Zone (VE/V) Deep-Dive
VE and V zones carry the highest structural risk — base flooding combined with wave action. These parcels are typically excluded from standard flood insurance at standard rates.ve_df = sedona.sql(f"""
SELECT
scity AS city,
COUNT(*) AS ve_parcel_count,
SUM(CAST(parval AS DOUBLE)) AS total_parcel_value,
AVG(CAST(parval AS DOUBLE)) AS avg_parcel_value,
SUM(CAST(improvval AS DOUBLE)) AS total_improv_value
FROM {PARCEL_TABLE}
WHERE state2 = 'CA'
AND fema_flood_zone IN ('VE', 'V')
AND scity IN ({cities_sql})
GROUP BY scity
ORDER BY ve_parcel_count DESC
""").toPandas()
ve_df = ve_df[ve_df['ve_parcel_count'] > 0]
fig, axes = plt.subplots(1, 2, figsize=(13, 4))
axes[0].barh(ve_df['city'], ve_df['ve_parcel_count'], color='#E24B4A')
axes[0].set_title('VE/V zone parcel count', fontsize=12)
axes[0].xaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'{x:,.0f}'))
axes[0].invert_yaxis()
axes[1].barh(ve_df['city'], ve_df['total_parcel_value'] / 1e9, color='#E24B4A')
axes[1].set_title('VE/V zone total assessed value ($B)', fontsize=12)
axes[1].xaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'${x:.1f}B'))
axes[1].invert_yaxis()
plt.suptitle('Coastal wave-action zone (VE/V) exposure by city', fontsize=13, y=1.02)
plt.tight_layout()
plt.savefig('ca_ve_zone_exposure.png', dpi=150, bbox_inches='tight')
plt.show()
print(f"\nTotal VE/V parcels: {ve_df['ve_parcel_count'].sum():,.0f}")
print(f"Total VE/V value: ${ve_df['total_parcel_value'].sum()/1e9:.2f}B")
7. Land Use Mix in High-Risk Zones
Understanding which property types are exposed informs both insurance risk and policy response.landuse_df = sedona.sql(f"""
SELECT
CASE
WHEN usedesc LIKE '%Single%' OR usedesc LIKE '%Residential Single%'
THEN 'Single-Family Residential'
WHEN usedesc LIKE '%Multi%' OR usedesc LIKE '%Apartment%'
THEN 'Multi-Family Residential'
WHEN usedesc LIKE '%Two Units%' OR usedesc LIKE '%Three Units%'
OR usedesc LIKE '%Four Units%'
THEN 'Small Multi-Unit (2-4)'
WHEN usedesc LIKE '%Mobile Home%'
THEN 'Mobile Home'
WHEN usedesc LIKE '%Commercial%' OR usedesc LIKE '%Retail%'
OR usedesc LIKE '%Store%'
THEN 'Commercial'
WHEN usedesc LIKE '%Industrial%' OR usedesc LIKE '%Warehouse%'
THEN 'Industrial'
WHEN usedesc LIKE '%Vacant%' OR usedesc LIKE '%Undeveloped%'
THEN 'Vacant / Undeveloped'
WHEN usedesc = '' OR usedesc IS NULL
THEN 'Unclassified'
ELSE 'Other'
END AS land_use,
COUNT(*) AS parcel_count,
SUM(CAST(parval AS DOUBLE)) AS total_value
FROM {PARCEL_TABLE}
WHERE state2 = 'CA'
AND fema_flood_zone IN ({zones_sql})
AND scity IN ({cities_sql})
GROUP BY land_use
ORDER BY parcel_count DESC
""").toPandas()
landuse_df
lu_colors = {
'Single-Family Residential': '#378ADD',
'Multi-Family Residential': '#63C1A0',
'Small Multi-Unit (2-4)': '#EF9F27',
'Mobile Home': '#E24B4A',
'Commercial': '#AFA9EC',
'Industrial': '#888780',
'Vacant / Undeveloped': '#D3D1C7',
'Unclassified': '#cccccc',
'Other': '#b0b0b0',
}
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
c1 = [lu_colors.get(lu, '#cccccc') for lu in landuse_df['land_use']]
axes[0].bar(landuse_df['land_use'], landuse_df['parcel_count'], color=c1)
axes[0].set_title('Parcel count by land use\n(high-risk zones, coastal CA)', fontsize=11)
axes[0].set_ylabel('Parcel count')
axes[0].yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'{x:,.0f}'))
axes[0].tick_params(axis='x', rotation=35)
axes[1].bar(landuse_df['land_use'], landuse_df['total_value'] / 1e9, color=c1)
axes[1].set_title('Total assessed value by land use ($B)\n(high-risk zones, coastal CA)', fontsize=11)
axes[1].set_ylabel('Assessed value ($B)')
axes[1].yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'${x:.1f}B'))
axes[1].tick_params(axis='x', rotation=35)
plt.tight_layout()
plt.savefig('ca_landuse_flood_risk.png', dpi=150, bbox_inches='tight')
plt.show()
8. Moderate Risk: 500-Year Floodplain
Zone X (0.2% annual chance) parcels represent the next tier of risk — not mapped as high-hazard but still relevant for comprehensive exposure modeling.moderate_df = sedona.sql(f"""
SELECT
scity AS city,
COUNT(*) AS parcel_count,
SUM(CAST(parval AS DOUBLE)) AS total_parcel_value
FROM {PARCEL_TABLE}
WHERE state2 = 'CA'
AND fema_flood_zone = 'X'
AND fema_flood_zone_subtype = '{MODERATE_RISK_SUBTYPE}'
AND scity IN ({cities_sql})
GROUP BY scity
ORDER BY parcel_count DESC
""").toPandas()
moderate_df = moderate_df[moderate_df['parcel_count'] > 0]
fig, ax = plt.subplots(figsize=(11, 4))
ax.bar(moderate_df['city'], moderate_df['parcel_count'], color='#B5D4F4', width=0.65)
ax.set_title('Moderate-risk parcels (Zone X — 500-year floodplain) by city', fontsize=12)
ax.set_ylabel('Parcel count')
ax.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'{x:,.0f}'))
ax.tick_params(axis='x', rotation=30)
plt.tight_layout()
plt.savefig('ca_moderate_risk.png', dpi=150, bbox_inches='tight')
plt.show()
print(moderate_df[['city','parcel_count','total_parcel_value']].to_string(index=False))
9. Summary Table Export
ve_counts = ve_df.set_index('city')[['ve_parcel_count']].rename(
columns={'ve_parcel_count': 've_v_parcels'})
mod_counts = moderate_df.set_index('city')[['parcel_count']].rename(
columns={'parcel_count': 'moderate_risk_parcels'})
summary = (
city_totals
.set_index('city')
.join(ve_counts, how='left')
.join(mod_counts, how='left')
.fillna(0)
.astype({'ve_v_parcels': int, 'moderate_risk_parcels': int})
[['high_risk_parcels','ve_v_parcels','moderate_risk_parcels',
'total_parcel_value','total_improv_value','avg_parcel_value']]
.sort_values('high_risk_parcels', ascending=False)
)
summary.columns = [
'High-Risk Parcels (A/AE/AO/VE)',
'Extreme Coastal (VE/V)',
'Moderate Risk (500-yr)',
'Total Parcel Value ($)',
'Total Improvement Value ($)',
'Avg Parcel Value ($)'
]
summary.to_csv('ca_coastal_flood_risk_summary.csv')
pd.options.display.float_format = '${:,.0f}'.format
summary
10. (Optional) Spatial Join: Buildings per High-Risk Parcel
If you want building-level counts rather than parcel-level counts, uncomment and run this cell. It joins Overture Maps building footprints to high-risk parcels usingST_INTERSECTS. This is a large spatial join — run on a medium or large Wherobots runtime.
# OPTIONAL — building-level join (requires medium+ runtime)
# Uncomment to execute.
# building_join_df = sedona.sql(f"""
# SELECT
# p.scity AS city,
# p.fema_flood_zone AS zone,
# COUNT(DISTINCT p.ll_uuid) AS parcel_count,
# COUNT(b.id) AS building_count,
# SUM(CAST(p.parval AS DOUBLE)) AS total_parcel_value
# FROM {PARCEL_TABLE} p
# JOIN wherobots_open_data.overture_maps_foundation.buildings_building b
# ON ST_INTERSECTS(p.geometry, b.geometry)
# WHERE p.state2 = 'CA'
# AND p.fema_flood_zone IN ({zones_sql})
# AND p.scity IN ({cities_sql})
# GROUP BY p.scity, p.fema_flood_zone
# ORDER BY building_count DESC
# """).toPandas()
# building_join_df
Outputs
| File | Contents |
|---|---|
ca_statewide_flood_tiers.png | Donut chart: CA parcels by FEMA risk tier |
ca_parcel_count_by_city_zone.png | Stacked bar: parcel count by city and zone |
ca_value_at_risk_by_city.png | Stacked bar: assessed value at risk by city |
ca_avg_parcel_value.png | Bar: avg assessed value per parcel by city |
ca_ve_zone_exposure.png | Dual horizontal bar: VE/V zone exposure |
ca_landuse_flood_risk.png | Bar: parcel count and value by land use |
ca_moderate_risk.png | Bar: 500-year floodplain parcel counts |
ca_coastal_flood_risk_summary.csv | Full city-level summary table |

