Resampling with ODC-geo (S3 storage, NetCDF file, Zarr reader, icechunk virtualization)

import argparse

import xarray as xr
from icechunk import IcechunkStore, StorageConfig
from odc.geo.geobox import GeoBox
from odc.geo.geom import Geometry
from odc.geo.xr import crop, xr_reproject
from shapely.geometry import box
def warp_resample(dataset, zoom=0):
    from common import earthaccess_args, target_extent

    te = target_extent[zoom]

    # Define filepath, driver, and variable information
    args = earthaccess_args[dataset]
    # Open dataset
    storage = StorageConfig.s3_from_env(
        bucket="nasa-veda-scratch",
        prefix=f"resampling/icechunk/{dataset}-reference",
        region="us-west-2",
    )
    store = IcechunkStore.open_existing(storage=storage, mode="r")
    da = xr.open_zarr(store, zarr_format=3, consolidated=False)[args["variable"]]
    # Define source and target projection
    dstSRS = "EPSG:3857"
    srcSRS = "EPSG:4326"
    width = height = 256
    # Define ODC geobox for target tile
    gbox = GeoBox.from_bbox(te, dstSRS, shape=(height, width))
    if dataset == "gpm_imerg":
        # Transpose and rename dataset dims to align with GDAL expectations
        da = da.rename({"lon": "x", "lat": "y"}).transpose("time", "y", "x").squeeze()
    # Assign input projection
    da = da.odc.assign_crs(srcSRS)
    # Crop dataset to tile bounds
    bbox = box(*te)
    geom = Geometry(bbox, "EPSG:3857")
    da = crop(da, geom)
    # Load into memory to avoid topology error
    da.load()
    # Reproject dataset
    return xr_reproject(da, gbox, tight=True).load()
if __name__ == "__main__":
    if "get_ipython" in dir():
        # Just call warp_resample if running as a Jupyter Notebook
        da = warp_resample("mursst")
    else:
        # Configure dataset via argpase if running via CLI
        parser = argparse.ArgumentParser(description="Set environment for the script.")
        parser.add_argument(
            "--dataset",
            default="mursst",
            help="Dataset to resample.",
            choices=["gpm_imerg", "mursst"],
        )
        parser.add_argument(
            "--zoom",
            default=0,
            help="Zoom level for tile extent.",
        )
        user_args = parser.parse_args()
        da = warp_resample(user_args.dataset, int(user_args.zoom))