Resampling with Rioxarray (S3 storage, Web-Optimized Zarr V3 store, Zarr reader with icechunk)

WARNING: This notebook is intented to show the potential for web-optimized Zarrs that contain overviews, but the approach should not be used in production due to the lack a robust approach following a metadata specification.

import argparse

from icechunk import IcechunkStore, StorageConfig
from rasterio.crs import CRS
from rasterio.transform import from_bounds
from rasterio.warp import calculate_default_transform
from xarray import open_zarr
def get_overview_level(
    dataset_bounds,
    dataset_shape,
    target_bounds: tuple,
    overviews: list,
    height: int = 256,
    width: int = 256,
    srcSRS: CRS = CRS.from_string("EPSG:4326"),
    dstSRS: CRS = CRS.from_string("EPSG:3857"),
) -> int:
    """Return the overview level corresponding to the tile resolution.

    Freely adapted from rio-tiler, which freely adapted from https://github.com/OSGeo/gdal/blob/41993f127e6e1669fbd9e944744b7c9b2bd6c400/gdal/apps/gdalwarp_lib.cpp#L2293-L2362

    Args:
        src_dst (rasterio.io.DatasetReader or rasterio.io.DatasetWriter or rasterio.vrt.WarpedVRT): Rasterio dataset.
        bounds (tuple): Bounding box coordinates in target crs (**dstSRS**).
        overviews (list): List of overview decimation levels.
        height (int): Desired output height of the array for the input bounds.
        width (int): Desired output width of the array for the input bounds.
        srcSRS (rasterio.crs.CRS, optional): Source Coordinate Reference System. Defaults to `epsg:4326`.
        dstSRS (rasterio.crs.CRS, optional): Target Coordinate Reference System. Defaults to `epsg:3857`.

    Returns:
        int: Overview level.

    """

    dst_transform, _, _ = calculate_default_transform(
        srcSRS, dstSRS, dataset_shape[1], dataset_shape[0], *dataset_bounds
    )
    src_res = dst_transform.a

    # Compute what the "natural" output resolution
    # (in pixels) would be for this input dataset
    vrt_transform = from_bounds(*target_bounds, width, height)
    target_res = vrt_transform.a

    ovr_idx = -1
    if target_res > src_res:
        res = [src_res * decim for decim in overviews]

        for idx in range(ovr_idx, len(res) - 1):
            ovr_idx = idx
            ovrRes = src_res if ovr_idx < 0 else res[ovr_idx]
            nextRes = res[ovr_idx + 1]

            if (ovrRes < target_res) and (nextRes > target_res):
                break

            if abs(ovrRes - target_res) < 1e-1:
                break

        else:
            print("else")
            ovr_idx = len(res) - 1
    return overviews[ovr_idx - 1]
def warp_resample(dataset, zoom=0):
    from common import target_extent

    te = target_extent[zoom]

    # Open dataset
    storage = StorageConfig.s3_from_env(
        bucket="nasa-veda-scratch",
        prefix=f"resampling/icechunk/{dataset}-overviews.zarr",
        region="us-west-2",
    )
    store = IcechunkStore.open_or_create(storage=storage)

    # Define source and target projection
    dstSRS = "EPSG:3857"
    srcSRS = "EPSG:4326"
    width = height = 256
    # Hard code some metadata that could be included in a GeoZarr spec
    overviews = [2, 4, 8, 16, 32, 64]
    bounds = [-179.995, -89.99499999999999, 180.005, 89.99499999999999]
    shape = (17999, 36000)
    # Get overview level for associated zoom level
    level = get_overview_level(bounds, shape, te, overviews)
    # Open overview
    da = open_zarr(store, group=str(level), zarr_format=3, consolidated=False)["var"]
    # Set input dataset projection
    da = da.rio.write_crs(srcSRS)
    da = da.rio.clip_box(
        *te,
        crs=dstSRS,
    )
    # Define affine transformation from input to output dataset
    dst_transform, w, h = calculate_default_transform(
        srcSRS,
        dstSRS,
        da.rio.width,
        da.rio.height,
        *da.rio.bounds(),
        dst_width=width,
        dst_height=height,
    )
    # Reproject dataset
    return da.rio.reproject(dstSRS, shape=(h, w), transform=dst_transform).load()
if __name__ == "__main__":
    if "get_ipython" in dir():
        # Just call warp_resample if running as a Jupyter Notebook
        da = warp_resample("mursst")
    else:
        # Configure dataset via argpase if running via CLI
        parser = argparse.ArgumentParser(description="Set environment for the script.")
        parser.add_argument(
            "--dataset",
            default="mursst",
            help="Dataset to resample.",
            choices=["mursst"],
        )
        parser.add_argument(
            "--zoom",
            default=0,
            help="Zoom level for tile extent.",
        )
        user_args = parser.parse_args()
        da = warp_resample(user_args.dataset, int(user_args.zoom))