Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Understanding Population Data (Exposure)

This tutorial helps you understand population data using the Global Human Settlement (GHS) datasets. You’ll explore and compare two different datasets to understand their characteristics and differences.

We’ll work with:

  • GHS-POP: Historical and projected population (1975-2030)

  • GHS-WUP: Urban population projections (1975-2100)

Prerequisites: We’ll use preprocessed data for the Molise region in Italy (NUTS2 region “ITF2”)). If you change the region, please download and process the data population data for your region using the Population GHS how-to guide.

What you’ll learn:

  • Load and visualize population timeseries from CSV files

  • Explore spatial population distributions from raster data

  • Compare different population datasets

  • Understand temporal and spatial patterns in population dynamics

Setup

User settings

admin_id = "ITF2"  # Molise, Italy
workdir = "/home/nejk/code/extreme_precip_exposure"

Load libraries

import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import regionmask
from pathlib import Path
import rioxarray as rxr
import os
import re

# Set working directory
os.chdir(workdir)
print(f"Working directory: {os.getcwd()}")

# Set up data directories
data_dir = Path("./data")
ghs_pop_dir = data_dir / "population" / "GHS_POP"
ghs_wup_dir = data_dir / "population" / "GHS_WUP_POP"
output_dir = data_dir / admin_id / "ghs_population"

print(f"Data directories:")
print(f"  GHS-POP: {ghs_pop_dir}")
print(f"  GHS-WUP: {ghs_wup_dir}")
print(f"  Output: {output_dir}")
Working directory: /etc/ecmwf/nfs/dh2_home_a/nejk/code/extreme_precip_exposure
Data directories:
  GHS-POP: data/population/GHS_POP
  GHS-WUP: data/population/GHS_WUP_POP
  Output: data/ITF2/ghs_population

Load region shapefile

# Read NUTS shapefiles
regions_dir = data_dir / 'regions'
nuts_shp = regions_dir / 'NUTS_RG_20M_2024_4326' / 'NUTS_RG_20M_2024_4326.shp'
nuts_gdf = gpd.read_file(nuts_shp)

# Select the region of interest
sel_gdf = nuts_gdf[nuts_gdf['NUTS_ID'] == admin_id]
print(f"Region: {sel_gdf['NUTS_NAME'].values[0]} ({admin_id})")
print(f"Country: {sel_gdf['CNTR_CODE'].values[0]}")
lon_min, lat_min, lon_max, lat_max = sel_gdf.geometry.total_bounds

# Create a regionmask from the admin region geometry
admin_mask = regionmask.from_geopandas(sel_gdf, names='NUTS_ID')
Region: Molise (ITF2)
Country: IT

Load Population Timeseries

First, let’s load the processed CSV files created by the population_ghs how-to guide.

# Load CSV files
csv_ghs_pop = output_dir / f"population_ghs_pop_{admin_id}.csv"
csv_ghs_wup = output_dir / f"population_ghs_wup_{admin_id}.csv"

# Read the data
df_ghs_pop = pd.read_csv(csv_ghs_pop)
df_ghs_wup = pd.read_csv(csv_ghs_wup)

print("GHS-POP data:")
print(f"  Years: {df_ghs_pop['year'].min():.0f} - {df_ghs_pop['year'].max():.0f}")
print(f"  Records: {len(df_ghs_pop)}")
print(f"\nGHS-WUP data:")
print(f"  Years: {df_ghs_wup['year'].min():.0f} - {df_ghs_wup['year'].max():.0f}")
print(f"  Records: {len(df_ghs_wup)}")

# Display first few rows
print(f"\nGHS-POP sample:")
print(df_ghs_pop.head())
print(f"\nGHS-WUP sample:")
print(df_ghs_wup.head())
GHS-POP data:
  Years: 1975 - 2030
  Records: 12

GHS-WUP data:
  Years: 1975 - 2100
  Records: 26

GHS-POP sample:
   year     population
0  1975  324997.084086
1  1980  322792.686845
2  1985  315148.696077
3  1990  307101.380304
4  1995  300026.743534

GHS-WUP sample:
   year     population
0  1975  326933.709961
1  1980  324847.772566
2  1985  317922.344295
3  1990  310127.078795
4  1995  304151.088004

Compare Population Timeseries

Let’s visualize and compare the two datasets to understand their differences.

Visualize both datasets

fig, ax = plt.subplots(figsize=(14, 6))

ax.plot(df_ghs_pop['year'], df_ghs_pop['population'], 
        marker='o', linewidth=2, markersize=8, label='GHS-POP (1975-2030)', color='#2E86AB')
ax.plot(df_ghs_wup['year'], df_ghs_wup['population'], 
        marker='s', linewidth=2, markersize=6, label='GHS-WUP (1975-2100)', color='#A23B72', alpha=0.8)

ax.set_xlabel('Year', fontsize=12)
ax.set_ylabel('Population', fontsize=12)
ax.set_title(f'Population Change in {admin_id} - Dataset Comparison', fontsize=14, fontweight='bold')
ax.legend(fontsize=11, loc='lower left')
ax.grid(True, alpha=0.3)

# Format y-axis
from matplotlib.ticker import FuncFormatter
def millions(x, pos):
    return f'{x/1e6:.1f}M' if x >= 1e6 else f'{x/1e3:.0f}K'
ax.yaxis.set_major_formatter(FuncFormatter(millions))

plt.tight_layout()
plt.show()
<Figure size 1400x600 with 1 Axes>

Quantify differences

For overlapping years (1975-2030), let’s calculate the differences between the two datasets.

# Merge datasets on year for overlapping period
df_comparison = df_ghs_pop.merge(df_ghs_wup, on='year', suffixes=('_pop', '_wup'))

# Calculate differences
df_comparison['difference'] = df_comparison['population_wup'] - df_comparison['population_pop']
df_comparison['percent_diff'] = (df_comparison['difference'] / df_comparison['population_pop']) * 100

print("Dataset Comparison (1975-2030):")
print("="*80)
print(f"{'Year':<8} {'GHS-POP':>15} {'GHS-WUP':>15} {'Difference':>15} {'Diff %':>10}")
print("-"*80)

for _, row in df_comparison.iterrows():
    print(f"{row['year']:<8.0f} {row['population_pop']:>15,.0f} {row['population_wup']:>15,.0f} "
          f"{row['difference']:>15,.0f} {row['percent_diff']:>9.2f}%")

print("\nSummary Statistics:")
print(f"  Mean absolute difference: {df_comparison['difference'].abs().mean():,.0f} people")
print(f"  Mean percentage difference: {df_comparison['percent_diff'].abs().mean():.2f}%")
print(f"  Max difference: {df_comparison['difference'].abs().max():,.0f} in {df_comparison.loc[df_comparison['difference'].abs().idxmax(), 'year']:.0f}")
Dataset Comparison (1975-2030):
================================================================================
Year             GHS-POP         GHS-WUP      Difference     Diff %
--------------------------------------------------------------------------------
1975             324,997         326,934           1,937      0.60%
1980             322,793         324,848           2,055      0.64%
1985             315,149         317,922           2,774      0.88%
1990             307,101         310,127           3,026      0.99%
1995             300,027         304,151           4,124      1.37%
2000             292,010         294,884           2,873      0.98%
2005             290,995         293,907           2,911      1.00%
2010             291,741         294,722           2,981      1.02%
2015             277,825         280,672           2,847      1.02%
2020             254,974         257,922           2,949      1.16%
2025             236,998         241,395           4,397      1.86%
2030             219,359         223,097           3,738      1.70%

Summary Statistics:
  Mean absolute difference: 3,051 people
  Mean percentage difference: 1.10%
  Max difference: 4,397 in 2025

Explore Spatial Population Distribution

Now let’s load and visualize the actual spatial raster data to understand population distribution patterns.

Load GHS-POP raster data

Interactive map of present-day population

First, let’s create an interactive map of the current population distribution that you can zoom and explore.

# Load 2020 data for interactive map
!pip install folium
import folium
from folium import plugins

# Load GHS-POP files
ghs_pop_files = sorted(list(ghs_pop_dir.glob("*/GHS_POP_E*_GLOBE_R2023A_4326_30ss_V1_0.tif")))

# Load GHS-POP 2020
present_year = 2020
ghs_pop_2020_file = [f for f in ghs_pop_files if f"E{present_year}" in f.name]

if ghs_pop_2020_file:
    tif_file = ghs_pop_2020_file[0]
    ds_2020 = rxr.open_rasterio(tif_file)
    # Clip to region
    ds_2020_clipped = ds_2020.sel(x=slice(lon_min, lon_max), y=slice(lat_max, lat_min))
    ds_2020_clipped = ds_2020_clipped.where(ds_2020_clipped >= 0)
    pop_2020 = ds_2020_clipped.squeeze("band", drop=True)
    
    # Apply mask
    mask_2020 = admin_mask.mask(pop_2020.x, pop_2020.y)
    pop_2020_masked = pop_2020.where(~np.isnan(mask_2020.values))
    
    print(f"Loaded GHS-POP {present_year} for interactive map")
    print(f"Data shape: {pop_2020_masked.shape}")
    print(f"Value range: {float(pop_2020_masked.min()):.2f} - {float(pop_2020_masked.max()):.2f}")
else:
    print(f"Could not find GHS-POP {present_year} file")
Defaulting to user installation because normal site-packages is not writeable
Requirement already satisfied: folium in /home/nejk/.local/lib/python3.12/site-packages (0.20.0)
Requirement already satisfied: branca>=0.6.0 in /usr/local/apps/python3/3.12.11-01/lib/python3.12/site-packages (from folium) (0.8.2)
Requirement already satisfied: jinja2>=2.9 in /usr/local/apps/python3/3.12.11-01/lib/python3.12/site-packages (from folium) (3.1.6)
Requirement already satisfied: numpy in /usr/local/apps/python3/3.12.11-01/lib/python3.12/site-packages (from folium) (2.3.3)
Requirement already satisfied: requests in /usr/local/apps/python3/3.12.11-01/lib/python3.12/site-packages (from folium) (2.32.5)
Requirement already satisfied: xyzservices in /usr/local/apps/python3/3.12.11-01/lib/python3.12/site-packages (from folium) (2025.4.0)
Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/apps/python3/3.12.11-01/lib/python3.12/site-packages (from jinja2>=2.9->folium) (3.0.3)
Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/apps/python3/3.12.11-01/lib/python3.12/site-packages (from requests->folium) (3.4.4)
Requirement already satisfied: idna<4,>=2.5 in /usr/local/apps/python3/3.12.11-01/lib/python3.12/site-packages (from requests->folium) (3.11)
Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/apps/python3/3.12.11-01/lib/python3.12/site-packages (from requests->folium) (2.5.0)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/apps/python3/3.12.11-01/lib/python3.12/site-packages (from requests->folium) (2025.10.5)

[notice] A new release of pip is available: 25.1.1 -> 26.0.1
[notice] To update, run: pip install --upgrade pip
Loaded GHS-POP 2020 for interactive map
Data shape: (82, 144)
Value range: 0.00 - 4444.41
# Create interactive folium map
# Calculate center of region
center_lat = (lat_min + lat_max) / 2
center_lon = (lon_min + lon_max) / 2

# Create base map with a nice satellite/terrain basemap
m = folium.Map(
    location=[center_lat, center_lon],
    zoom_start=9,
    tiles=None  # We'll add custom tiles
)

# Add multiple basemap options
folium.TileLayer('OpenStreetMap', name='OpenStreetMap').add_to(m)
folium.TileLayer(
    tiles='https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
    attr='Esri World Imagery',
    name='Satellite',
    overlay=False,
    control=True
).add_to(m)
folium.TileLayer(
    tiles='https://server.arcgisonline.com/ArcGIS/rest/services/World_Topo_Map/MapServer/tile/{z}/{y}/{x}',
    attr='Esri World Topo',
    name='Topographic',
    overlay=False,
    control=True
).add_to(m)

# Add region boundary
folium.GeoJson(
    sel_gdf.geometry,
    name=f'Region {admin_id}',
    style_function=lambda x: {
        'fillColor': 'none',
        'color': 'red',
        'weight': 3,
        'fillOpacity': 0
    }
).add_to(m)

# Add population data as a heatmap overlay
# Convert to numpy array and get coordinates
pop_array = pop_2020_masked.values
y_coords = pop_2020_masked.y.values
x_coords = pop_2020_masked.x.values

# Create heat map data points (sample for performance)
heat_data = []
step = max(1, len(y_coords) // 100)  # Sample every nth point for performance

for i in range(0, len(y_coords), step):
    for j in range(0, len(x_coords), step):
        val = pop_array[i, j]
        if not np.isnan(val) and val > 0:
            heat_data.append([y_coords[i], x_coords[j], float(val)])

# Add heatmap
plugins.HeatMap(
    heat_data,
    name='Population Heatmap',
    min_opacity=0.3,
    max_zoom=13,
    radius=15,
    blur=20,
    gradient={0.4: 'yellow', 0.6: 'orange', 0.8: 'red', 1.0: 'darkred'}
).add_to(m)

# Add layer control
folium.LayerControl().add_to(m)

# Add title
title_html = f'''
<div style="position: fixed; 
     top: 10px; left: 50px; width: 400px; height: 60px; 
     background-color: white; border:2px solid grey; z-index:9999; 
     font-size:16px; padding: 10px; box-shadow: 2px 2px 6px rgba(0,0,0,0.3);">
     <b>Population Distribution {admin_id} ({present_year})</b><br>
     <i>Toggle layers and zoom to explore</i>
</div>
'''
m.get_root().html.add_child(folium.Element(title_html))

# Display map
m
Loading...
# Load GHS-POP files for selected years
ghs_pop_files = sorted(list(ghs_pop_dir.glob("*/GHS_POP_E*_GLOBE_R2023A_4326_30ss_V1_0.tif")))
print(f"Found {len(ghs_pop_files)} GHS-POP files")

# Load selected years: 1990, 2000, 2010, 2020, 2030
selected_years_pop = [1990, 2000, 2010, 2020, 2030]
pop_data_dict = {}

for year in selected_years_pop:
    matching_files = [f for f in ghs_pop_files if f"E{year}" in f.name]
    if matching_files:
        tif_file = matching_files[0]
        ds = rxr.open_rasterio(tif_file)
        # Clip to region
        ds_clipped = ds.sel(x=slice(lon_min, lon_max), y=slice(lat_max, lat_min))
        ds_clipped = ds_clipped.where(ds_clipped >= 0)  # Remove fill values
        pop_data_dict[year] = ds_clipped.squeeze("band", drop=True)
        print(f"  Loaded {year}: shape {ds_clipped.shape}")

print(f"\nLoaded {len(pop_data_dict)} years of GHS-POP data")
Found 12 GHS-POP files
  Loaded 1990: shape (1, 82, 144)
  Loaded 2000: shape (1, 82, 144)
  Loaded 2010: shape (1, 82, 144)
  Loaded 2020: shape (1, 82, 144)
  Loaded 2030: shape (1, 82, 144)

Loaded 5 years of GHS-POP data

Load GHS-WUP raster data

# Load GHS-WUP files for the same years
ghs_wup_files = sorted(list(ghs_wup_dir.glob("*/GHS_WUP_POP_E*_GLOBE_R2025A_4326_30ss_V1_0.tif")))
print(f"Found {len(ghs_wup_files)} GHS-WUP files")

wup_data_dict = {}

for year in selected_years_pop:
    matching_files = [f for f in ghs_wup_files if f"E{year}" in f.name]
    if matching_files:
        tif_file = matching_files[0]
        ds = rxr.open_rasterio(tif_file)
        # Clip to region
        ds_clipped = ds.sel(x=slice(lon_min, lon_max), y=slice(lat_max, lat_min))
        ds_clipped = ds_clipped.where(ds_clipped >= 0)  # Remove fill values
        wup_data_dict[year] = ds_clipped.squeeze("band", drop=True)
        print(f"  Loaded {year}: shape {ds_clipped.shape}")

print(f"\nLoaded {len(wup_data_dict)} years of GHS-WUP data")
Found 26 GHS-WUP files
  Loaded 1990: shape (1, 82, 144)
  Loaded 2000: shape (1, 82, 144)
  Loaded 2010: shape (1, 82, 144)
  Loaded 2020: shape (1, 82, 144)
  Loaded 2030: shape (1, 82, 144)

Loaded 5 years of GHS-WUP data

Visualize spatial distribution: GHS-POP

Let’s visualize how population is distributed spatially across the region for different time periods.

# Create region mask (use first available year to create mask)
first_year = selected_years_pop[0]
pop_admin_mask_raw = admin_mask.mask(pop_data_dict[first_year].x, pop_data_dict[first_year].y)

# Plot GHS-POP for selected years
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
axes = axes.flatten()

for idx, year in enumerate(selected_years_pop):
    if year in pop_data_dict:
        # Create mask for this specific year's coordinates
        mask_raw = admin_mask.mask(pop_data_dict[year].x, pop_data_dict[year].y)
        pop_mask = xr.DataArray(
            (~np.isnan(mask_raw.values)).astype(float),
            coords={'y': pop_data_dict[year].y, 'x': pop_data_dict[year].x},
            dims=['y', 'x']
        )
        
        masked_pop = pop_data_dict[year].where(pop_mask == 1)
        
        im = masked_pop.plot(
            ax=axes[idx],
            cmap='YlOrRd',
            add_colorbar=True,
            cbar_kwargs={'label': 'Population per cell', 'shrink': 0.8},
            vmin=0,
            vmax=100
        )
        
        axes[idx].set_title(f'GHS-POP {year}', fontsize=12, fontweight='bold')
        axes[idx].set_xlabel('Longitude')
        axes[idx].set_ylabel('Latitude')
        sel_gdf.boundary.plot(ax=axes[idx], color='black', linewidth=2)

# Remove the last subplot if odd number
if len(selected_years_pop) < 6:
    fig.delaxes(axes[5])

plt.suptitle(f'Population Distribution in {admin_id} - GHS-POP', fontsize=16, fontweight='bold', y=0.98)
plt.tight_layout()
plt.show()
<Figure size 1800x1000 with 10 Axes>

Visualize spatial distribution: GHS-WUP

# Plot GHS-WUP for selected years
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
axes = axes.flatten()

for idx, year in enumerate(selected_years_pop):
    if year in wup_data_dict:
        # Create mask for this specific year's coordinates
        mask_raw = admin_mask.mask(wup_data_dict[year].x, wup_data_dict[year].y)
        wup_mask = xr.DataArray(
            (~np.isnan(mask_raw.values)).astype(float),
            coords={'y': wup_data_dict[year].y, 'x': wup_data_dict[year].x},
            dims=['y', 'x']
        )
        
        masked_wup = wup_data_dict[year].where(wup_mask == 1)
        
        im = masked_wup.plot(
            ax=axes[idx],
            cmap='YlOrRd',
            add_colorbar=True,
            cbar_kwargs={'label': 'Population per cell', 'shrink': 0.8},
            vmin=0,
            vmax=100
        )
        
        axes[idx].set_title(f'GHS-WUP {year}', fontsize=12, fontweight='bold')
        axes[idx].set_xlabel('Longitude')
        axes[idx].set_ylabel('Latitude')
        sel_gdf.boundary.plot(ax=axes[idx], color='black', linewidth=2)

# Remove the last subplot if odd number
if len(selected_years_pop) < 6:
    fig.delaxes(axes[5])

plt.suptitle(f'Population Distribution in {admin_id} - GHS-WUP', fontsize=16, fontweight='bold', y=0.98)
plt.tight_layout()
plt.show()
<Figure size 1800x1000 with 10 Axes>

Compare spatial patterns: Side-by-side

Let’s compare GHS-POP and GHS-WUP for the same year to understand the differences.

# Compare 2020 for both datasets
comparison_year = 2020

fig, axes = plt.subplots(1, 3, figsize=(20, 6))

# GHS-POP 2020
# Create mask for POP dataset
mask_pop_raw = admin_mask.mask(pop_data_dict[comparison_year].x, pop_data_dict[comparison_year].y)
masked_pop = pop_data_dict[comparison_year].where(~np.isnan(mask_pop_raw.values))

im1 = masked_pop.plot(
    ax=axes[0],
    cmap='YlOrRd',
    add_colorbar=True,
    cbar_kwargs={'label': 'Population per cell'},
    vmin=0,
    vmax=100
)
axes[0].set_title(f'GHS-POP {comparison_year}', fontsize=12, fontweight='bold')
axes[0].set_xlabel('Longitude')
axes[0].set_ylabel('Latitude')
sel_gdf.boundary.plot(ax=axes[0], color='black', linewidth=2)

# GHS-WUP 2020
# Create mask for WUP dataset
mask_wup_raw = admin_mask.mask(wup_data_dict[comparison_year].x, wup_data_dict[comparison_year].y)
masked_wup = wup_data_dict[comparison_year].where(~np.isnan(mask_wup_raw.values))

im2 = masked_wup.plot(
    ax=axes[1],
    cmap='YlOrRd',
    add_colorbar=True,
    cbar_kwargs={'label': 'Population per cell'},
    vmin=0,
    vmax=100
)
axes[1].set_title(f'GHS-WUP {comparison_year}', fontsize=12, fontweight='bold')
axes[1].set_xlabel('Longitude')
axes[1].set_ylabel('Latitude')
sel_gdf.boundary.plot(ax=axes[1], color='black', linewidth=2)

# Difference - ensure both are on same coordinates
# Interpolate WUP to POP coordinates if needed, or vice versa
try:
    # Try direct subtraction first
    difference = masked_wup - masked_pop
    # Check if result has any valid data
    if difference.isnull().all():
        # If all NaN, interpolate WUP to POP grid
        masked_wup_interp = masked_wup.interp_like(masked_pop)
        difference = masked_wup_interp - masked_pop
except:
    # If there's an error, try interpolation
    masked_wup_interp = masked_wup.interp_like(masked_pop)
    difference = masked_wup_interp - masked_pop

im3 = difference.plot(
    ax=axes[2],
    cmap='RdBu_r',
    add_colorbar=True,
    cbar_kwargs={'label': 'Difference (WUP - POP)'},
    center=0,
    vmin=-50,
    vmax=50

)
axes[2].set_title(f'Difference {comparison_year}', fontsize=12, fontweight='bold')
axes[2].set_xlabel('Longitude')
axes[2].set_ylabel('Latitude')
sel_gdf.boundary.plot(ax=axes[2], color='black', linewidth=2)

plt.suptitle(f'Dataset Comparison for {admin_id} ({comparison_year})', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()
<Figure size 2000x600 with 6 Axes>

Future population change: GHS-WUP 2025 vs 2100

Let’s explore how the GHS-WUP dataset projects population change by comparing present day (2025) with end-of-century (2100).

# Load 2025 and 2100 from GHS-WUP
future_years = [2025, 2100]
wup_future_dict = {}

for year in future_years:
    matching_files = [f for f in ghs_wup_files if f"E{year}" in f.name]
    if matching_files:
        tif_file = matching_files[0]
        ds = rxr.open_rasterio(tif_file)
        # Clip to region
        ds_clipped = ds.sel(x=slice(lon_min, lon_max), y=slice(lat_max, lat_min))
        ds_clipped = ds_clipped.where(ds_clipped >= 0)  # Remove fill values
        wup_future_dict[year] = ds_clipped.squeeze("band", drop=True)
        print(f"Loaded GHS-WUP {year}: shape {ds_clipped.shape}")

print(f"\nLoaded {len(wup_future_dict)} years for future comparison")
Loaded GHS-WUP 2025: shape (1, 82, 144)
Loaded GHS-WUP 2100: shape (1, 82, 144)

Loaded 2 years for future comparison
# Compare 2025 vs 2100
fig, axes = plt.subplots(1, 3, figsize=(20, 6))

# 2025 (Present)
mask_2025 = admin_mask.mask(wup_future_dict[2025].x, wup_future_dict[2025].y)
masked_2025 = wup_future_dict[2025].where(~np.isnan(mask_2025.values))

im1 = masked_2025.plot(
    ax=axes[0],
    cmap='YlOrRd',
    add_colorbar=True,
    cbar_kwargs={'label': 'Population per cell'},
    vmin=0,
    vmax=100
)
axes[0].set_title('GHS-WUP 2025 (Present)', fontsize=12, fontweight='bold')
axes[0].set_xlabel('Longitude')
axes[0].set_ylabel('Latitude')
sel_gdf.boundary.plot(ax=axes[0], color='black', linewidth=2)

# 2100 (Future)
mask_2100 = admin_mask.mask(wup_future_dict[2100].x, wup_future_dict[2100].y)
masked_2100 = wup_future_dict[2100].where(~np.isnan(mask_2100.values))

im2 = masked_2100.plot(
    ax=axes[1],
    cmap='YlOrRd',
    add_colorbar=True,
    cbar_kwargs={'label': 'Population per cell'},
    vmin=0,
    vmax=100
)
axes[1].set_title('GHS-WUP 2100 (Future)', fontsize=12, fontweight='bold')
axes[1].set_xlabel('Longitude')
axes[1].set_ylabel('Latitude')
sel_gdf.boundary.plot(ax=axes[1], color='black', linewidth=2)

# Change (2100 - 2025)
# Interpolate if needed
try:
    future_change = masked_2100 - masked_2025
    if future_change.isnull().all():
        masked_2100_interp = masked_2100.interp_like(masked_2025)
        future_change = masked_2100_interp - masked_2025
except:
    masked_2100_interp = masked_2100.interp_like(masked_2025)
    future_change = masked_2100_interp - masked_2025

im3 = future_change.plot(
    ax=axes[2],
    cmap='RdBu_r',
    add_colorbar=True,
    cbar_kwargs={'label': 'Population change (2100 - 2025)'},
    center=0,
    vmin=-50,
    vmax=50
)
axes[2].set_title('Projected Change (2100 - 2025)', fontsize=12, fontweight='bold')
axes[2].set_xlabel('Longitude')
axes[2].set_ylabel('Latitude')
sel_gdf.boundary.plot(ax=axes[2], color='black', linewidth=2)

plt.suptitle(f'Future Population Projection for {admin_id} - GHS-WUP', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

# Print summary statistics
print(f"\nPopulation change summary (2025 → 2100):")
total_2025 = masked_2025.sum().values
total_2100 = masked_2100.sum().values if future_change.isnull().all() == False else masked_2100_interp.sum().values
change = total_2100 - total_2025
percent_change = (change / total_2025) * 100

print(f"  2025 total: {total_2025:,.0f}")
print(f"  2100 total: {total_2100:,.0f}")
print(f"  Absolute change: {change:+,.0f}")
print(f"  Percent change: {percent_change:+.1f}%")
<Figure size 2000x600 with 6 Axes>

Population change summary (2025 → 2100):
  2025 total: 241,395
  2100 total: 59,564
  Absolute change: -181,831
  Percent change: -75.3%

Key Takeaways

What have we learned about population exposure data?

  1. Temporal Coverage: GHS-POP covers 1975-2030, while GHS-WUP extends to 2100

  2. Dataset Differences: The two datasets show variations in population estimates, reflecting different methodologies

  3. Spatial Patterns: Population is not uniformly distributed - urban centers show higher densities

  4. Temporal Trends: Population changes over time can be quantified and visualized

For risk assessment: These differences matter when calculating drought exposure. Choose the appropriate dataset based on your analysis timeframe and the specific population aspect you’re studying (total vs. urban).