Skip to content

Commit

Permalink
Add satellite image processing benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
jrbourbeau committed Sep 16, 2024
1 parent 98ed1d2 commit e534038
Showing 1 changed file with 117 additions and 0 deletions.
117 changes: 117 additions & 0 deletions tests/geospatial/test_satellite_filtering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import datetime

import fsspec
import geojson
import planetary_computer
import pystac_client
import stackstac
import xarray as xr
from distributed import wait


def harmonize_to_old(data):
"""
Harmonize new Sentinel-2 data to the old baseline.
Parameters
----------
data: xarray.DataArray
A DataArray with four dimensions: time, band, y, x
Returns
-------
harmonized: xarray.DataArray
A DataArray with all values harmonized to the old
processing baseline.
"""
cutoff = datetime.datetime(2022, 1, 25)
offset = 1000
bands = [
"B01",
"B02",
"B03",
"B04",
"B05",
"B06",
"B07",
"B08",
"B8A",
"B09",
"B10",
"B11",
"B12",
]

old = data.sel(time=slice(cutoff))

to_process = list(set(bands) & set(data.band.data.tolist()))
new = data.sel(time=slice(cutoff, None)).drop_sel(band=to_process)

new_harmonized = data.sel(time=slice(cutoff, None), band=to_process).clip(offset)
new_harmonized -= offset

new = xr.concat([new, new_harmonized], "band").sel(band=data.band.data.tolist())
return xr.concat([old, new], dim="time")


def test_satellite_filtering(
scale,
client_factory,
cluster_kwargs={
"workspace": "dask-engineering-azure",
"region": "westeurope",
"wait_for_workers": True,
},
scale_kwargs={
"small": {"n_workers": 10},
"large": {"n_workers": 100},
},
):
with client_factory(
**scale_kwargs[scale], **cluster_kwargs
) as client: # noqa: F841
catalog = pystac_client.Client.open(
"https://planetarycomputer.microsoft.com/api/stac/v1",
modifier=planetary_computer.sign_inplace,
)

# GeoJSON for region of interest is from https://github.com/isellsoap/deutschlandGeoJSON/tree/main/1_deutschland
with fsspec.open(
"https://raw.githubusercontent.com/isellsoap/deutschlandGeoJSON/main/1_deutschland/3_mittel.geo.json"
) as f:
gj = geojson.load(f)

# Flatten MultiPolygon to single Polygon
coordinates = []
for x in gj.features[0]["geometry"]["coordinates"]:
coordinates.extend(x)
area_of_interest = {
"type": "Polygon",
"coordinates": coordinates,
}

# Get stack items
if scale == "small":
time_of_interest = "2024-01-01/2024-09-01"
else:
time_of_interest = "2015-01-01/2024-09-01"

search = catalog.search(
collections=["sentinel-2-l2a"],
intersects=area_of_interest,
datetime=time_of_interest,
)
items = search.item_collection()

# Construct Xarray Dataset from stack items
ds = stackstac.stack(items, assets=["B08", "B11"], chunksize=(400, 1, 256, 256))
# See https://planetarycomputer.microsoft.com/dataset/sentinel-2-l2a#Baseline-Change
ds = harmonize_to_old(ds)

# Compute humidity index
humidity = (ds.sel(band="B08") - ds.sel(band="B11")) / (
ds.sel(band="B08") + ds.sel(band="B11")
)
result = humidity.groupby("time.month").mean()
result = result.persist()
wait(result)

0 comments on commit e534038

Please sign in to comment.