diff --git a/tests/geospatial/test_satellite_filtering.py b/tests/geospatial/test_satellite_filtering.py new file mode 100644 index 0000000000..efbd38f98e --- /dev/null +++ b/tests/geospatial/test_satellite_filtering.py @@ -0,0 +1,117 @@ +import datetime + +import fsspec +import geojson +import planetary_computer +import pystac_client +import stackstac +import xarray as xr +from distributed import wait + + +def harmonize_to_old(data): + """ + Harmonize new Sentinel-2 data to the old baseline. + + Parameters + ---------- + data: xarray.DataArray + A DataArray with four dimensions: time, band, y, x + + Returns + ------- + harmonized: xarray.DataArray + A DataArray with all values harmonized to the old + processing baseline. + """ + cutoff = datetime.datetime(2022, 1, 25) + offset = 1000 + bands = [ + "B01", + "B02", + "B03", + "B04", + "B05", + "B06", + "B07", + "B08", + "B8A", + "B09", + "B10", + "B11", + "B12", + ] + + old = data.sel(time=slice(cutoff)) + + to_process = list(set(bands) & set(data.band.data.tolist())) + new = data.sel(time=slice(cutoff, None)).drop_sel(band=to_process) + + new_harmonized = data.sel(time=slice(cutoff, None), band=to_process).clip(offset) + new_harmonized -= offset + + new = xr.concat([new, new_harmonized], "band").sel(band=data.band.data.tolist()) + return xr.concat([old, new], dim="time") + + +def test_satellite_filtering( + scale, + client_factory, + cluster_kwargs={ + "workspace": "dask-engineering-azure", + "region": "westeurope", + "wait_for_workers": True, + }, + scale_kwargs={ + "small": {"n_workers": 10}, + "large": {"n_workers": 100}, + }, +): + with client_factory( + **scale_kwargs[scale], **cluster_kwargs + ) as client: # noqa: F841 + catalog = pystac_client.Client.open( + "https://planetarycomputer.microsoft.com/api/stac/v1", + modifier=planetary_computer.sign_inplace, + ) + + # GeoJSON for region of interest is from https://github.com/isellsoap/deutschlandGeoJSON/tree/main/1_deutschland + with fsspec.open( + "https://raw.githubusercontent.com/isellsoap/deutschlandGeoJSON/main/1_deutschland/3_mittel.geo.json" + ) as f: + gj = geojson.load(f) + + # Flatten MultiPolygon to single Polygon + coordinates = [] + for x in gj.features[0]["geometry"]["coordinates"]: + coordinates.extend(x) + area_of_interest = { + "type": "Polygon", + "coordinates": coordinates, + } + + # Get stack items + if scale == "small": + time_of_interest = "2024-01-01/2024-09-01" + else: + time_of_interest = "2015-01-01/2024-09-01" + + search = catalog.search( + collections=["sentinel-2-l2a"], + intersects=area_of_interest, + datetime=time_of_interest, + ) + items = search.item_collection() + + # Construct Xarray Dataset from stack items + ds = stackstac.stack(items, assets=["B08", "B11"], chunksize=(400, 1, 256, 256)) + # See https://planetarycomputer.microsoft.com/dataset/sentinel-2-l2a#Baseline-Change + ds = harmonize_to_old(ds) + + # Compute humidity index + humidity = (ds.sel(band="B08") - ds.sel(band="B11")) / ( + ds.sel(band="B08") + ds.sel(band="B11") + ) + result = humidity.groupby("time.month").mean() + result = result.persist() + wait(result)