Resampling with ODC-geo (s3 storage, NetCDF File, H5NetCDF driver, earthaccess auth)

import argparse

import earthaccess
import xarray as xr
from odc.geo.geobox import GeoBox
from odc.geo.geom import Geometry
from odc.geo.xr import crop, xr_reproject
from shapely.geometry import box
def warp_resample(dataset, zoom=0):
    from common import earthaccess_args, target_extent

    te = target_extent[zoom]

    # Define filepath, driver, and variable information
    args = earthaccess_args[dataset]
    input_uri = f'{args["folder"]}/{args["filename"]}'
    src = f's3://{args["bucket"]}/{input_uri}'
    # Define source and target projection
    dstSRS = "EPSG:3857"
    srcSRS = "EPSG:4326"
    width = height = 256
    # Authentical via earthaccess
    earthaccess.login()
    fs = earthaccess.get_s3_filesystem(daac=args["daac"])
    # Specify fsspec caching since default options don't work well for raster data
    fsspec_caching = {
        "cache_type": "none",
    }
    with fs.open(src, **fsspec_caching) as f:
        # Define ODC geobox for target tile
        gbox = GeoBox.from_bbox(te, dstSRS, shape=(height, width))
        # Open dataset
        da = xr.open_dataset(f, engine="h5netcdf", chunks={})[args["variable"]]
        if dataset == "gpm_imerg":
            # Transpose and rename dataset dims to align with GDAL expectations
            da = (
                da.rename({"lon": "x", "lat": "y"})
                .transpose("time", "y", "x")
                .squeeze()
            )
        # Assign input projection
        da = da.odc.assign_crs(srcSRS)
        # Crop dataset to tile bounds
        bbox = box(*te)
        geom = Geometry(bbox, "EPSG:3857")
        da = crop(da, geom)
        # Load into memory to avoid topology error
        da.load()
        # Reproject dataset
        return xr_reproject(da, gbox, tight=True).load()
if __name__ == "__main__":
    if "get_ipython" in dir():
        # Just call warp_resample if running as a Jupyter Notebook
        da = warp_resample("gpm_imerg")
    else:
        # Configure dataset via argpase if running via CLI
        parser = argparse.ArgumentParser(description="Set environment for the script.")
        parser.add_argument(
            "--dataset",
            default="mursst",
            help="Dataset to resample.",
            choices=["gpm_imerg", "mursst"],
        )
        parser.add_argument(
            "--zoom",
            default=0,
            help="Zoom level for tile extent.",
        )
        user_args = parser.parse_args()
        da = warp_resample(user_args.dataset, int(user_args.zoom))