Skip to main content
The following content is a read-only preview of an executable Jupyter notebook.To run this notebook interactively:
  1. Go to Wherobots Cloud.
  2. Start a runtime.
  3. Open the notebook.
  4. In the Jupyter Launcher:
    1. Click File > Open Path.
    2. Paste the following path to access this notebook: examples/Analyzing_Data/California_Coastal_Flood_Risk_Analysis.ipynb
    3. Click Enter.
Purpose: Quantify the number of properties facing flood risk across California coastal cities using FEMA flood zone designations embedded in parcel-level data from Wherobots Open Data (Regrid). Data source: wherobots_open_data.partner_samples.regrid_parcels
Coverage: ~6.2M California parcels with FEMA flood zone designations
Output: City-level parcel counts, property value exposure, and land-use breakdowns by flood risk tier

FEMA Flood Zone Reference

TierZonesMeaning
Extreme — Coastal WaveVE, VCoastal high hazard: base flood + wave action
High — 100-Year FloodplainA, AE, AO, AH, A991% annual chance flood
Moderate — 500-Year FloodplainX (0.2% subtype)0.2% annual chance flood
Levee-ProtectedX (levee subtype)Reduced risk due to levee
MinimalX (minimal subtype)Outside mapped flood hazard
UndeterminedDNot studied

1. Setup

from sedona.spark import *
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import numpy as np

config = SedonaContext.builder().getOrCreate()
sedona = SedonaContext.create(config)

print("SedonaContext ready.")

2. Configuration

Adjust COASTAL_CITIES and HIGH_RISK_ZONES to expand or refine the analysis scope.
# ── Target cities (scity field in Regrid, uppercase) ──────────────────────────
COASTAL_CITIES = [
    'LOS ANGELES', 'SAN DIEGO', 'LONG BEACH', 'HUNTINGTON BEACH',
    'NEWPORT BEACH', 'VENICE', 'MALIBU', 'SANTA MONICA',
    'SANTA BARBARA', 'OXNARD', 'SANTA CRUZ', 'MONTEREY',
    'HALF MOON BAY', 'PACIFICA', 'RICHMOND', 'ALAMEDA',
    'OAKLAND', 'EUREKA', 'CRESCENT CITY'
]

# ── FEMA zones treated as high or extreme risk ────────────────────────────────
HIGH_RISK_ZONES    = ('A', 'AE', 'AO', 'AH', 'A99', 'VE', 'V')
COASTAL_WAVE_ZONES = ('VE', 'V')     # highest-severity subset
MODERATE_RISK_SUBTYPE = '0.2 PCT ANNUAL CHANCE FLOOD HAZARD'

# ── Table reference ───────────────────────────────────────────────────────────
PARCEL_TABLE = 'wherobots_open_data.partner_samples.regrid_parcels'

cities_sql = ', '.join(f"'{c}'" for c in COASTAL_CITIES)
zones_sql  = ', '.join(f"'{z}'" for z in HIGH_RISK_ZONES)

print(f"Analyzing {len(COASTAL_CITIES)} coastal cities across {len(HIGH_RISK_ZONES)} high-risk zone types.")

3. Statewide Flood Zone Distribution

Baseline view of all California parcels by FEMA zone tier.
statewide_df = sedona.sql(f"""
    SELECT
        fema_flood_zone,
        fema_flood_zone_subtype,
        COUNT(*) AS parcel_count
    FROM {PARCEL_TABLE}
    WHERE state2 = 'CA'
      AND fema_flood_zone != ''
    GROUP BY fema_flood_zone, fema_flood_zone_subtype
    ORDER BY parcel_count DESC
""").toPandas()

print(f"Total CA parcels with FEMA zone data: {statewide_df['parcel_count'].sum():,.0f}")
statewide_df
# Assign risk tier labels for charting
def assign_tier(row):
    z   = row['fema_flood_zone']
    sub = row['fema_flood_zone_subtype']
    if z in ('VE', 'V'):                                          return 'Extreme (VE/V)'
    if z in ('A', 'AE', 'AO', 'AH', 'A99'):                      return 'High (A/AE/AO/AH)'
    if z == 'X' and sub == '0.2 PCT ANNUAL CHANCE FLOOD HAZARD':  return 'Moderate (500-yr)'
    if z == 'X' and 'LEVEE' in sub:                               return 'Levee-Protected'
    if z == 'X':                                                  return 'Minimal (X)'
    if z == 'D':                                                  return 'Undetermined (D)'
    return 'Other'

statewide_df['tier'] = statewide_df.apply(assign_tier, axis=1)
tier_summary = statewide_df.groupby('tier')['parcel_count'].sum().sort_values(ascending=False)

tier_colors = {
    'Minimal (X)':        '#D3D1C7',
    'Moderate (500-yr)':  '#B5D4F4',
    'Levee-Protected':    '#B4B2A9',
    'High (A/AE/AO/AH)':  '#EF9F27',
    'Extreme (VE/V)':     '#E24B4A',
    'Undetermined (D)':   '#888780',
    'Other':              '#cccccc',
}

fig, ax = plt.subplots(figsize=(7, 7))
wedge_colors = [tier_colors.get(t, '#cccccc') for t in tier_summary.index]
wedges, texts, autotexts = ax.pie(
    tier_summary.values,
    labels=tier_summary.index,
    colors=wedge_colors,
    autopct=lambda p: f'{p:.1f}%' if p > 1 else '',
    startangle=140,
    wedgeprops=dict(width=0.55)
)
for t in texts:      t.set_fontsize(10)
for t in autotexts:  t.set_fontsize(9)
ax.set_title('California parcels by FEMA flood risk tier', fontsize=13, pad=16)
plt.tight_layout()
plt.savefig('ca_statewide_flood_tiers.png', dpi=150, bbox_inches='tight')
plt.show()
print(tier_summary.to_string())

4. City-Level Parcel Count by Flood Zone

city_zone_df = sedona.sql(f"""
    SELECT
        scity                              AS city,
        fema_flood_zone                    AS zone,
        COUNT(*)                           AS parcel_count,
        SUM(CAST(parval    AS DOUBLE))     AS total_parcel_value,
        SUM(CAST(improvval AS DOUBLE))     AS total_improvement_value,
        AVG(CAST(parval    AS DOUBLE))     AS avg_parcel_value
    FROM {PARCEL_TABLE}
    WHERE state2 = 'CA'
      AND fema_flood_zone IN ({zones_sql})
      AND scity           IN ({cities_sql})
    GROUP BY scity, fema_flood_zone
    ORDER BY parcel_count DESC
""").toPandas()

print(f"Rows returned: {len(city_zone_df)}")
city_zone_df.head(15)
# City totals (all high-risk zones combined)
city_totals = (
    city_zone_df
    .groupby('city')
    .agg(
        high_risk_parcels  = ('parcel_count',          'sum'),
        total_parcel_value = ('total_parcel_value',     'sum'),
        total_improv_value = ('total_improvement_value','sum'),
    )
    .reset_index()
    .sort_values('high_risk_parcels', ascending=False)
)
city_totals['avg_parcel_value'] = city_totals['total_parcel_value'] / city_totals['high_risk_parcels']
city_totals['value_B'] = city_totals['total_parcel_value'] / 1e9

print(f"Total high-risk parcels across analyzed cities: {city_totals['high_risk_parcels'].sum():,.0f}")
print(f"Total assessed parcel value at risk:           ${city_totals['total_parcel_value'].sum()/1e9:.1f}B")
city_totals[['city','high_risk_parcels','total_parcel_value','avg_parcel_value']]
# ── Stacked bar: parcel count by city and zone type ───────────────────────────
band_colors = {'VE/V': '#E24B4A', 'AE': '#378ADD', 'A': '#63C1A0', 'AO/AH': '#EF9F27'}

pivot = city_zone_df.pivot_table(
    index='city', columns='zone', values='parcel_count', aggfunc='sum', fill_value=0
)
pivot['VE/V']  = pivot.get('VE', 0) + pivot.get('V', 0)
pivot['AO/AH'] = pivot.get('AO', 0) + pivot.get('AH', 0) + pivot.get('A99', 0)
plot_cols = [c for c in ['VE/V','AE','A','AO/AH'] if c in pivot.columns]
pivot = pivot[plot_cols].sort_values('AE', ascending=False)

fig, ax = plt.subplots(figsize=(12, 5))
bottom = np.zeros(len(pivot))
for col in plot_cols:
    ax.bar(pivot.index, pivot[col], bottom=bottom,
           label=col, color=band_colors[col], width=0.65)
    bottom += pivot[col].values

ax.set_title('High-risk parcels by city — stacked by FEMA zone type', fontsize=13)
ax.set_ylabel('Parcel count')
ax.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'{x:,.0f}'))
ax.legend(title='Zone', bbox_to_anchor=(1.01, 1), loc='upper left')
ax.tick_params(axis='x', rotation=30)
plt.tight_layout()
plt.savefig('ca_parcel_count_by_city_zone.png', dpi=150, bbox_inches='tight')
plt.show()

5. Property Value at Risk by City

# ── Stacked bar: assessed value at risk by city and zone ─────────────────────
val_pivot = city_zone_df.copy()
val_pivot['zone_group'] = val_pivot['zone'].map(
    lambda z: 'VE/V'  if z in ('VE','V')  else
              'AE'    if z == 'AE'         else
              'A'     if z == 'A'          else 'AO/AH'
)
val_pivot = (
    val_pivot.groupby(['city','zone_group'])['total_parcel_value']
    .sum().reset_index()
    .pivot(index='city', columns='zone_group', values='total_parcel_value')
    .fillna(0)
    .div(1e9)   # → $B
)
val_plot_cols = [c for c in ['VE/V','AE','A','AO/AH'] if c in val_pivot.columns]
val_pivot['_total'] = val_pivot[val_plot_cols].sum(axis=1)
val_pivot = val_pivot.sort_values('_total', ascending=False).drop(columns='_total')

fig, ax = plt.subplots(figsize=(12, 5))
bottom = np.zeros(len(val_pivot))
for col in val_plot_cols:
    ax.bar(val_pivot.index, val_pivot[col], bottom=bottom,
           label=col, color=band_colors[col], width=0.65)
    bottom += val_pivot[col].values

ax.set_title('Total assessed parcel value at risk by city ($B)', fontsize=13)
ax.set_ylabel('Assessed value ($B)')
ax.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'${x:.1f}B'))
ax.legend(title='Zone', bbox_to_anchor=(1.01, 1), loc='upper left')
ax.tick_params(axis='x', rotation=30)
plt.tight_layout()
plt.savefig('ca_value_at_risk_by_city.png', dpi=150, bbox_inches='tight')
plt.show()
# ── Average assessed value per parcel in high-risk zones ─────────────────────
avg_val = city_totals.sort_values('avg_parcel_value', ascending=False)

fig, ax = plt.subplots(figsize=(11, 4))
bars = ax.bar(avg_val['city'], avg_val['avg_parcel_value'] / 1e6,
              color='#378ADD', width=0.65)
ax.set_title('Average assessed parcel value in high-risk zones ($M)', fontsize=13)
ax.set_ylabel('Avg value per parcel ($M)')
ax.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'${x:.1f}M'))
ax.tick_params(axis='x', rotation=30)

for bar in bars:
    h = bar.get_height()
    ax.text(bar.get_x() + bar.get_width() / 2, h + 0.05,
            f'${h:.1f}M', ha='center', va='bottom', fontsize=8.5)

plt.tight_layout()
plt.savefig('ca_avg_parcel_value.png', dpi=150, bbox_inches='tight')
plt.show()

6. Coastal Wave Zone (VE/V) Deep-Dive

VE and V zones carry the highest structural risk — base flooding combined with wave action. These parcels are typically excluded from standard flood insurance at standard rates.
ve_df = sedona.sql(f"""
    SELECT
        scity                          AS city,
        COUNT(*)                       AS ve_parcel_count,
        SUM(CAST(parval AS DOUBLE))    AS total_parcel_value,
        AVG(CAST(parval AS DOUBLE))    AS avg_parcel_value,
        SUM(CAST(improvval AS DOUBLE)) AS total_improv_value
    FROM {PARCEL_TABLE}
    WHERE state2 = 'CA'
      AND fema_flood_zone IN ('VE', 'V')
      AND scity IN ({cities_sql})
    GROUP BY scity
    ORDER BY ve_parcel_count DESC
""").toPandas()

ve_df = ve_df[ve_df['ve_parcel_count'] > 0]

fig, axes = plt.subplots(1, 2, figsize=(13, 4))

axes[0].barh(ve_df['city'], ve_df['ve_parcel_count'], color='#E24B4A')
axes[0].set_title('VE/V zone parcel count', fontsize=12)
axes[0].xaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'{x:,.0f}'))
axes[0].invert_yaxis()

axes[1].barh(ve_df['city'], ve_df['total_parcel_value'] / 1e9, color='#E24B4A')
axes[1].set_title('VE/V zone total assessed value ($B)', fontsize=12)
axes[1].xaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'${x:.1f}B'))
axes[1].invert_yaxis()

plt.suptitle('Coastal wave-action zone (VE/V) exposure by city', fontsize=13, y=1.02)
plt.tight_layout()
plt.savefig('ca_ve_zone_exposure.png', dpi=150, bbox_inches='tight')
plt.show()

print(f"\nTotal VE/V parcels: {ve_df['ve_parcel_count'].sum():,.0f}")
print(f"Total VE/V value:   ${ve_df['total_parcel_value'].sum()/1e9:.2f}B")

7. Land Use Mix in High-Risk Zones

Understanding which property types are exposed informs both insurance risk and policy response.
landuse_df = sedona.sql(f"""
    SELECT
        CASE
            WHEN usedesc LIKE '%Single%' OR usedesc LIKE '%Residential Single%'
                THEN 'Single-Family Residential'
            WHEN usedesc LIKE '%Multi%' OR usedesc LIKE '%Apartment%'
                THEN 'Multi-Family Residential'
            WHEN usedesc LIKE '%Two Units%' OR usedesc LIKE '%Three Units%'
                OR usedesc LIKE '%Four Units%'
                THEN 'Small Multi-Unit (2-4)'
            WHEN usedesc LIKE '%Mobile Home%'
                THEN 'Mobile Home'
            WHEN usedesc LIKE '%Commercial%' OR usedesc LIKE '%Retail%'
                OR usedesc LIKE '%Store%'
                THEN 'Commercial'
            WHEN usedesc LIKE '%Industrial%' OR usedesc LIKE '%Warehouse%'
                THEN 'Industrial'
            WHEN usedesc LIKE '%Vacant%' OR usedesc LIKE '%Undeveloped%'
                THEN 'Vacant / Undeveloped'
            WHEN usedesc = '' OR usedesc IS NULL
                THEN 'Unclassified'
            ELSE 'Other'
        END                             AS land_use,
        COUNT(*)                        AS parcel_count,
        SUM(CAST(parval AS DOUBLE))     AS total_value
    FROM {PARCEL_TABLE}
    WHERE state2 = 'CA'
      AND fema_flood_zone IN ({zones_sql})
      AND scity IN ({cities_sql})
    GROUP BY land_use
    ORDER BY parcel_count DESC
""").toPandas()

landuse_df
lu_colors = {
    'Single-Family Residential': '#378ADD',
    'Multi-Family Residential':  '#63C1A0',
    'Small Multi-Unit (2-4)':    '#EF9F27',
    'Mobile Home':               '#E24B4A',
    'Commercial':                '#AFA9EC',
    'Industrial':                '#888780',
    'Vacant / Undeveloped':      '#D3D1C7',
    'Unclassified':              '#cccccc',
    'Other':                     '#b0b0b0',
}

fig, axes = plt.subplots(1, 2, figsize=(14, 5))
c1 = [lu_colors.get(lu, '#cccccc') for lu in landuse_df['land_use']]

axes[0].bar(landuse_df['land_use'], landuse_df['parcel_count'], color=c1)
axes[0].set_title('Parcel count by land use\n(high-risk zones, coastal CA)', fontsize=11)
axes[0].set_ylabel('Parcel count')
axes[0].yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'{x:,.0f}'))
axes[0].tick_params(axis='x', rotation=35)

axes[1].bar(landuse_df['land_use'], landuse_df['total_value'] / 1e9, color=c1)
axes[1].set_title('Total assessed value by land use ($B)\n(high-risk zones, coastal CA)', fontsize=11)
axes[1].set_ylabel('Assessed value ($B)')
axes[1].yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'${x:.1f}B'))
axes[1].tick_params(axis='x', rotation=35)

plt.tight_layout()
plt.savefig('ca_landuse_flood_risk.png', dpi=150, bbox_inches='tight')
plt.show()

8. Moderate Risk: 500-Year Floodplain

Zone X (0.2% annual chance) parcels represent the next tier of risk — not mapped as high-hazard but still relevant for comprehensive exposure modeling.
moderate_df = sedona.sql(f"""
    SELECT
        scity                          AS city,
        COUNT(*)                       AS parcel_count,
        SUM(CAST(parval AS DOUBLE))    AS total_parcel_value
    FROM {PARCEL_TABLE}
    WHERE state2 = 'CA'
      AND fema_flood_zone = 'X'
      AND fema_flood_zone_subtype = '{MODERATE_RISK_SUBTYPE}'
      AND scity IN ({cities_sql})
    GROUP BY scity
    ORDER BY parcel_count DESC
""").toPandas()

moderate_df = moderate_df[moderate_df['parcel_count'] > 0]

fig, ax = plt.subplots(figsize=(11, 4))
ax.bar(moderate_df['city'], moderate_df['parcel_count'], color='#B5D4F4', width=0.65)
ax.set_title('Moderate-risk parcels (Zone X — 500-year floodplain) by city', fontsize=12)
ax.set_ylabel('Parcel count')
ax.yaxis.set_major_formatter(mticker.FuncFormatter(lambda x, _: f'{x:,.0f}'))
ax.tick_params(axis='x', rotation=30)
plt.tight_layout()
plt.savefig('ca_moderate_risk.png', dpi=150, bbox_inches='tight')
plt.show()

print(moderate_df[['city','parcel_count','total_parcel_value']].to_string(index=False))

9. Summary Table Export

ve_counts  = ve_df.set_index('city')[['ve_parcel_count']].rename(
    columns={'ve_parcel_count': 've_v_parcels'})
mod_counts = moderate_df.set_index('city')[['parcel_count']].rename(
    columns={'parcel_count': 'moderate_risk_parcels'})

summary = (
    city_totals
    .set_index('city')
    .join(ve_counts,  how='left')
    .join(mod_counts, how='left')
    .fillna(0)
    .astype({'ve_v_parcels': int, 'moderate_risk_parcels': int})
    [['high_risk_parcels','ve_v_parcels','moderate_risk_parcels',
      'total_parcel_value','total_improv_value','avg_parcel_value']]
    .sort_values('high_risk_parcels', ascending=False)
)

summary.columns = [
    'High-Risk Parcels (A/AE/AO/VE)',
    'Extreme Coastal (VE/V)',
    'Moderate Risk (500-yr)',
    'Total Parcel Value ($)',
    'Total Improvement Value ($)',
    'Avg Parcel Value ($)'
]

summary.to_csv('ca_coastal_flood_risk_summary.csv')

pd.options.display.float_format = '${:,.0f}'.format
summary

10. (Optional) Spatial Join: Buildings per High-Risk Parcel

If you want building-level counts rather than parcel-level counts, uncomment and run this cell. It joins Overture Maps building footprints to high-risk parcels using ST_INTERSECTS. This is a large spatial join — run on a medium or large Wherobots runtime.
# OPTIONAL — building-level join (requires medium+ runtime)
# Uncomment to execute.

# building_join_df = sedona.sql(f"""
#     SELECT
#         p.scity                         AS city,
#         p.fema_flood_zone               AS zone,
#         COUNT(DISTINCT p.ll_uuid)       AS parcel_count,
#         COUNT(b.id)                     AS building_count,
#         SUM(CAST(p.parval AS DOUBLE))   AS total_parcel_value
#     FROM {PARCEL_TABLE} p
#     JOIN wherobots_open_data.overture_maps_foundation.buildings_building b
#         ON ST_INTERSECTS(p.geometry, b.geometry)
#     WHERE p.state2 = 'CA'
#       AND p.fema_flood_zone IN ({zones_sql})
#       AND p.scity IN ({cities_sql})
#     GROUP BY p.scity, p.fema_flood_zone
#     ORDER BY building_count DESC
# """).toPandas()

# building_join_df

Outputs

FileContents
ca_statewide_flood_tiers.pngDonut chart: CA parcels by FEMA risk tier
ca_parcel_count_by_city_zone.pngStacked bar: parcel count by city and zone
ca_value_at_risk_by_city.pngStacked bar: assessed value at risk by city
ca_avg_parcel_value.pngBar: avg assessed value per parcel by city
ca_ve_zone_exposure.pngDual horizontal bar: VE/V zone exposure
ca_landuse_flood_risk.pngBar: parcel count and value by land use
ca_moderate_risk.pngBar: 500-year floodplain parcel counts
ca_coastal_flood_risk_summary.csvFull city-level summary table