Resampling with Rasterio (S3 storage, Web-Optimized Zarr V3 store, Zarr reader with icechunk)

WARNING: This notebook is intented to show the potential for web-optimized Zarrs that contain overviews, but the approach should not be used in production due to the lack a robust approach following a metadata specification.

import argparse
import os

import numpy as np
import zarr
from icechunk import IcechunkStore, StorageConfig
from rasterio.crs import CRS
from rasterio.transform import from_bounds
from rasterio.warp import calculate_default_transform, reproject
# Define filepath, driver, and variable information
dataset = "mursst"
dst = f"{os.getcwd()}/earthaccess_data/{dataset}-overviews.zarr"
# Open dataset
storage_config = StorageConfig.filesystem(dst)
store = IcechunkStore.open_or_create(storage=storage_config)
def get_overview_level(
    dataset_bounds,
    dataset_shape,
    target_bounds: tuple,
    overviews: list,
    height: int = 256,
    width: int = 256,
    srcSRS: CRS = CRS.from_string("EPSG:4326"),
    dstSRS: CRS = CRS.from_string("EPSG:3857"),
) -> int:
    """Return the overview level corresponding to the tile resolution.

    Freely adapted from rio-tiler, which freely adapted from https://github.com/OSGeo/gdal/blob/41993f127e6e1669fbd9e944744b7c9b2bd6c400/gdal/apps/gdalwarp_lib.cpp#L2293-L2362

    Args:
        src_dst (rasterio.io.DatasetReader or rasterio.io.DatasetWriter or rasterio.vrt.WarpedVRT): Rasterio dataset.
        bounds (tuple): Bounding box coordinates in target crs (**dstSRS**).
        overviews (list): List of overview decimation levels.
        height (int): Desired output height of the array for the input bounds.
        width (int): Desired output width of the array for the input bounds.
        srcSRS (rasterio.crs.CRS, optional): Source Coordinate Reference System. Defaults to `epsg:4326`.
        dstSRS (rasterio.crs.CRS, optional): Target Coordinate Reference System. Defaults to `epsg:3857`.

    Returns:
        int: Overview level.

    """

    dst_transform, _, _ = calculate_default_transform(
        srcSRS, dstSRS, dataset_shape[1], dataset_shape[0], *dataset_bounds
    )
    src_res = dst_transform.a

    # Compute what the "natural" output resolution
    # (in pixels) would be for this input dataset
    vrt_transform = from_bounds(*target_bounds, width, height)
    target_res = vrt_transform.a

    ovr_idx = -1
    if target_res > src_res:
        res = [src_res * decim for decim in overviews]

        for idx in range(ovr_idx, len(res) - 1):
            ovr_idx = idx
            ovrRes = src_res if ovr_idx < 0 else res[ovr_idx]
            nextRes = res[ovr_idx + 1]

            if (ovrRes < target_res) and (nextRes > target_res):
                break

            if abs(ovrRes - target_res) < 1e-1:
                break

        else:
            print("else")
            ovr_idx = len(res) - 1
    return overviews[ovr_idx - 1]
def warp_resample(dataset, zoom=0):
    from common import target_extent

    te = target_extent[zoom]

    # Open dataset
    storage = StorageConfig.s3_from_env(
        bucket="nasa-veda-scratch",
        prefix=f"resampling/icechunk/{dataset}-overviews.zarr",
        region="us-west-2",
    )
    store = IcechunkStore.open_or_create(storage=storage)

    # Define source and target projection
    dstSRS = "EPSG:3857"
    srcSRS = "EPSG:4326"
    width = height = 256
    # Hard code some metadata that could be included in a GeoZarr spec

    bounds = [-179.995, -89.99499999999999, 180.005, 89.99499999999999]
    shape = (17999, 36000)
    overviews = [2, 4, 8, 16, 32, 64]
    affine = {
        2: {
            "bounds": (-179.995, -89.99500000000002, 180.005, 89.99499999999999),
            "shape": (9000, 18000),
        },
        4: {
            "bounds": (-179.995, -89.99500000000002, 180.005, 89.99499999999999),
            "shape": (4500, 9000),
        },
        8: {
            "bounds": (-179.995, -89.99499999999999, 180.005, 89.99499999999999),
            "shape": (2250, 4500),
        },
        16: {
            "bounds": (
                -179.995,
                -89.99500000000002,
                180.00500000000005,
                89.99499999999999,
            ),
            "shape": (1125, 2250),
        },
        32: {
            "bounds": (-179.995, -89.99500000000002, 180.005, 89.99499999999999),
            "shape": (563, 1125),
        },
        64: {
            "bounds": (-179.995, -89.99500000000002, 180.005, 89.99499999999999),
            "shape": (282, 563),
        },
    }
    # Get overview level for associated zoom level
    level = get_overview_level(bounds, shape, te, overviews)
    # Open overview
    data = zarr.open(store, mode="r", path=f"{level}/var")
    # Define affine transformation from input to output dataset
    src_transform = from_bounds(
        *affine[level]["bounds"], affine[level]["shape"][1], affine[level]["shape"][0]
    )
    dst_transform = from_bounds(*te, width, height)
    # Create array to host results
    destination = np.zeros((height, width), np.float32)
    # Reproject dataset
    _, transform = reproject(
        np.squeeze(data),
        destination,
        src_crs=srcSRS,
        src_transform=src_transform,
        dst_crs=dstSRS,
        dst_transform=dst_transform,
    )
    return destination
if __name__ == "__main__":
    if "get_ipython" in dir():
        # Just call warp_resample if running as a Jupyter Notebook
        da = warp_resample("mursst")
    else:
        # Configure dataset via argpase if running via CLI
        parser = argparse.ArgumentParser(description="Set environment for the script.")
        parser.add_argument(
            "--dataset",
            default="mursst",
            help="Dataset to resample.",
            choices=["mursst"],
        )
        parser.add_argument(
            "--zoom",
            default=0,
            help="Zoom level for tile extent.",
        )
        user_args = parser.parse_args()
        da = warp_resample(user_args.dataset, int(user_args.zoom))