import argparse
import os
import numpy as np
import zarr
from icechunk import IcechunkStore, StorageConfig
from rasterio.crs import CRS
from rasterio.transform import from_bounds
from rasterio.warp import calculate_default_transform, reproject
Resampling with Rasterio (S3 storage, Web-Optimized Zarr V3 store, Zarr reader with icechunk)
WARNING: This notebook is intented to show the potential for web-optimized Zarrs that contain overviews, but the approach should not be used in production due to the lack a robust approach following a metadata specification.
# Define filepath, driver, and variable information
= "mursst"
dataset = f"{os.getcwd()}/earthaccess_data/{dataset}-overviews.zarr"
dst # Open dataset
= StorageConfig.filesystem(dst)
storage_config = IcechunkStore.open_or_create(storage=storage_config) store
def get_overview_level(
dataset_bounds,
dataset_shape,tuple,
target_bounds: list,
overviews: int = 256,
height: int = 256,
width: = CRS.from_string("EPSG:4326"),
srcSRS: CRS = CRS.from_string("EPSG:3857"),
dstSRS: CRS -> int:
) """Return the overview level corresponding to the tile resolution.
Freely adapted from rio-tiler, which freely adapted from https://github.com/OSGeo/gdal/blob/41993f127e6e1669fbd9e944744b7c9b2bd6c400/gdal/apps/gdalwarp_lib.cpp#L2293-L2362
Args:
src_dst (rasterio.io.DatasetReader or rasterio.io.DatasetWriter or rasterio.vrt.WarpedVRT): Rasterio dataset.
bounds (tuple): Bounding box coordinates in target crs (**dstSRS**).
overviews (list): List of overview decimation levels.
height (int): Desired output height of the array for the input bounds.
width (int): Desired output width of the array for the input bounds.
srcSRS (rasterio.crs.CRS, optional): Source Coordinate Reference System. Defaults to `epsg:4326`.
dstSRS (rasterio.crs.CRS, optional): Target Coordinate Reference System. Defaults to `epsg:3857`.
Returns:
int: Overview level.
"""
= calculate_default_transform(
dst_transform, _, _ 1], dataset_shape[0], *dataset_bounds
srcSRS, dstSRS, dataset_shape[
)= dst_transform.a
src_res
# Compute what the "natural" output resolution
# (in pixels) would be for this input dataset
= from_bounds(*target_bounds, width, height)
vrt_transform = vrt_transform.a
target_res
= -1
ovr_idx if target_res > src_res:
= [src_res * decim for decim in overviews]
res
for idx in range(ovr_idx, len(res) - 1):
= idx
ovr_idx = src_res if ovr_idx < 0 else res[ovr_idx]
ovrRes = res[ovr_idx + 1]
nextRes
if (ovrRes < target_res) and (nextRes > target_res):
break
if abs(ovrRes - target_res) < 1e-1:
break
else:
print("else")
= len(res) - 1
ovr_idx return overviews[ovr_idx - 1]
def warp_resample(dataset, zoom=0):
from common import target_extent
= target_extent[zoom]
te
# Open dataset
= StorageConfig.s3_from_env(
storage ="nasa-veda-scratch",
bucket=f"resampling/icechunk/{dataset}-overviews.zarr",
prefix="us-west-2",
region
)= IcechunkStore.open_or_create(storage=storage)
store
# Define source and target projection
= "EPSG:3857"
dstSRS = "EPSG:4326"
srcSRS = height = 256
width # Hard code some metadata that could be included in a GeoZarr spec
= [-179.995, -89.99499999999999, 180.005, 89.99499999999999]
bounds = (17999, 36000)
shape = [2, 4, 8, 16, 32, 64]
overviews = {
affine 2: {
"bounds": (-179.995, -89.99500000000002, 180.005, 89.99499999999999),
"shape": (9000, 18000),
},4: {
"bounds": (-179.995, -89.99500000000002, 180.005, 89.99499999999999),
"shape": (4500, 9000),
},8: {
"bounds": (-179.995, -89.99499999999999, 180.005, 89.99499999999999),
"shape": (2250, 4500),
},16: {
"bounds": (
-179.995,
-89.99500000000002,
180.00500000000005,
89.99499999999999,
),"shape": (1125, 2250),
},32: {
"bounds": (-179.995, -89.99500000000002, 180.005, 89.99499999999999),
"shape": (563, 1125),
},64: {
"bounds": (-179.995, -89.99500000000002, 180.005, 89.99499999999999),
"shape": (282, 563),
},
}# Get overview level for associated zoom level
= get_overview_level(bounds, shape, te, overviews)
level # Open overview
= zarr.open(store, mode="r", path=f"{level}/var")
data # Define affine transformation from input to output dataset
= from_bounds(
src_transform *affine[level]["bounds"], affine[level]["shape"][1], affine[level]["shape"][0]
)= from_bounds(*te, width, height)
dst_transform # Create array to host results
= np.zeros((height, width), np.float32)
destination # Reproject dataset
= reproject(
_, transform
np.squeeze(data),
destination,=srcSRS,
src_crs=src_transform,
src_transform=dstSRS,
dst_crs=dst_transform,
dst_transform
)return destination
if __name__ == "__main__":
if "get_ipython" in dir():
# Just call warp_resample if running as a Jupyter Notebook
= warp_resample("mursst")
da else:
# Configure dataset via argpase if running via CLI
= argparse.ArgumentParser(description="Set environment for the script.")
parser
parser.add_argument("--dataset",
="mursst",
defaulthelp="Dataset to resample.",
=["mursst"],
choices
)
parser.add_argument("--zoom",
=0,
defaulthelp="Zoom level for tile extent.",
)= parser.parse_args()
user_args = warp_resample(user_args.dataset, int(user_args.zoom)) da