import argparse
import os
import numpy as np
import zarr
from icechunk import IcechunkStore, StorageConfig
from rasterio.crs import CRS
from rasterio.transform import from_bounds
from rasterio.warp import calculate_default_transform, reprojectResampling with Rasterio (S3 storage, Web-Optimized Zarr V3 store, Zarr reader with icechunk)
WARNING: This notebook is intented to show the potential for web-optimized Zarrs that contain overviews, but the approach should not be used in production due to the lack a robust approach following a metadata specification.
# Define filepath, driver, and variable information
dataset = "mursst"
dst = f"{os.getcwd()}/earthaccess_data/{dataset}-overviews.zarr"
# Open dataset
storage_config = StorageConfig.filesystem(dst)
store = IcechunkStore.open_or_create(storage=storage_config)def get_overview_level(
dataset_bounds,
dataset_shape,
target_bounds: tuple,
overviews: list,
height: int = 256,
width: int = 256,
srcSRS: CRS = CRS.from_string("EPSG:4326"),
dstSRS: CRS = CRS.from_string("EPSG:3857"),
) -> int:
"""Return the overview level corresponding to the tile resolution.
Freely adapted from rio-tiler, which freely adapted from https://github.com/OSGeo/gdal/blob/41993f127e6e1669fbd9e944744b7c9b2bd6c400/gdal/apps/gdalwarp_lib.cpp#L2293-L2362
Args:
src_dst (rasterio.io.DatasetReader or rasterio.io.DatasetWriter or rasterio.vrt.WarpedVRT): Rasterio dataset.
bounds (tuple): Bounding box coordinates in target crs (**dstSRS**).
overviews (list): List of overview decimation levels.
height (int): Desired output height of the array for the input bounds.
width (int): Desired output width of the array for the input bounds.
srcSRS (rasterio.crs.CRS, optional): Source Coordinate Reference System. Defaults to `epsg:4326`.
dstSRS (rasterio.crs.CRS, optional): Target Coordinate Reference System. Defaults to `epsg:3857`.
Returns:
int: Overview level.
"""
dst_transform, _, _ = calculate_default_transform(
srcSRS, dstSRS, dataset_shape[1], dataset_shape[0], *dataset_bounds
)
src_res = dst_transform.a
# Compute what the "natural" output resolution
# (in pixels) would be for this input dataset
vrt_transform = from_bounds(*target_bounds, width, height)
target_res = vrt_transform.a
ovr_idx = -1
if target_res > src_res:
res = [src_res * decim for decim in overviews]
for idx in range(ovr_idx, len(res) - 1):
ovr_idx = idx
ovrRes = src_res if ovr_idx < 0 else res[ovr_idx]
nextRes = res[ovr_idx + 1]
if (ovrRes < target_res) and (nextRes > target_res):
break
if abs(ovrRes - target_res) < 1e-1:
break
else:
print("else")
ovr_idx = len(res) - 1
return overviews[ovr_idx - 1]def warp_resample(dataset, zoom=0):
from common import target_extent
te = target_extent[zoom]
# Open dataset
storage = StorageConfig.s3_from_env(
bucket="nasa-veda-scratch",
prefix=f"resampling/icechunk/{dataset}-overviews.zarr",
region="us-west-2",
)
store = IcechunkStore.open_or_create(storage=storage)
# Define source and target projection
dstSRS = "EPSG:3857"
srcSRS = "EPSG:4326"
width = height = 256
# Hard code some metadata that could be included in a GeoZarr spec
bounds = [-179.995, -89.99499999999999, 180.005, 89.99499999999999]
shape = (17999, 36000)
overviews = [2, 4, 8, 16, 32, 64]
affine = {
2: {
"bounds": (-179.995, -89.99500000000002, 180.005, 89.99499999999999),
"shape": (9000, 18000),
},
4: {
"bounds": (-179.995, -89.99500000000002, 180.005, 89.99499999999999),
"shape": (4500, 9000),
},
8: {
"bounds": (-179.995, -89.99499999999999, 180.005, 89.99499999999999),
"shape": (2250, 4500),
},
16: {
"bounds": (
-179.995,
-89.99500000000002,
180.00500000000005,
89.99499999999999,
),
"shape": (1125, 2250),
},
32: {
"bounds": (-179.995, -89.99500000000002, 180.005, 89.99499999999999),
"shape": (563, 1125),
},
64: {
"bounds": (-179.995, -89.99500000000002, 180.005, 89.99499999999999),
"shape": (282, 563),
},
}
# Get overview level for associated zoom level
level = get_overview_level(bounds, shape, te, overviews)
# Open overview
data = zarr.open(store, mode="r", path=f"{level}/var")
# Define affine transformation from input to output dataset
src_transform = from_bounds(
*affine[level]["bounds"], affine[level]["shape"][1], affine[level]["shape"][0]
)
dst_transform = from_bounds(*te, width, height)
# Create array to host results
destination = np.zeros((height, width), np.float32)
# Reproject dataset
_, transform = reproject(
np.squeeze(data),
destination,
src_crs=srcSRS,
src_transform=src_transform,
dst_crs=dstSRS,
dst_transform=dst_transform,
)
return destinationif __name__ == "__main__":
if "get_ipython" in dir():
# Just call warp_resample if running as a Jupyter Notebook
da = warp_resample("mursst")
else:
# Configure dataset via argpase if running via CLI
parser = argparse.ArgumentParser(description="Set environment for the script.")
parser.add_argument(
"--dataset",
default="mursst",
help="Dataset to resample.",
choices=["mursst"],
)
parser.add_argument(
"--zoom",
default=0,
help="Zoom level for tile extent.",
)
user_args = parser.parse_args()
da = warp_resample(user_args.dataset, int(user_args.zoom))