import argparse
import earthaccess
import fsspec
import numpy as np
import xarray as xr
import xesmf as xe
Resampling with XESMF (S3 storage, NetCDF file, Zarr reader, kerchunk virtualization, and pre-generated weights)
def _reconstruct_xesmf_weights(ds_w):
"""
Reconstruct weights into format that xESMF understands
Notes
-----
From ndpyramid - https://github.com/carbonplan/ndpyramid
"""
import sparse
import xarray as xr
= ds_w["col"].values - 1
col = ds_w["row"].values - 1
row = ds_w["S"].values
s = ds_w.attrs["n_out"], ds_w.attrs["n_in"]
n_out, n_in = np.stack([row, col])
crds return xr.DataArray(
=("out_dim", "in_dim"), name="weights"
sparse.COO(crds, s, (n_out, n_in)), dims
)
def reconstruct_weights(weights_fp):
"""
Reconstruct weights into format that xESMF understands
Notes
-----
From ndpyramid - https://github.com/carbonplan/ndpyramid
"""
return _reconstruct_xesmf_weights(xr.open_zarr(weights_fp))
def regrid(dataset, zoom=0):
from common import earthaccess_args # noqa: 402
= earthaccess_args[dataset]
args # Load pre-generated weights and target dataset
= f"s3://nasa-veda-scratch/resampling/test-weight-caching/{dataset}-weights-{zoom}.zarr"
weights_fp = f"s3://nasa-veda-scratch/resampling/test-weight-caching/{dataset}-target-{zoom}.zarr"
target_grid_fp = reconstruct_weights(weights_fp)
weights = xr.open_zarr(target_grid_fp)
grid if dataset == "gpm_imerg":
= f'earthaccess_data/{args["filename"][:-4]}.json'
src else:
= f'earthaccess_data/{args["filename"][:-3]}.json'
src # Authenticate with earthaccess
= earthaccess.get_s3fs_session(daac=args["daac"])
s3_fs = s3_fs.storage_options.copy()
storage_options # Specify fsspec caching since default options don't work well for raster data
= {
fsspec_caching "cache_type": "none",
}# Open dataset using kerchunk
= fsspec.filesystem("reference", fo=src, **fsspec_caching)
fs = fs.get_mapper("")
m = xr.open_dataset(
da
m,="kerchunk",
engine={},
chunks=storage_options,
storage_options"variable"]]
)[args[# Create XESMF regridder
= xe.Regridder(
regridder
da,
grid,"nearest_s2d",
=True,
periodic="nearest_s2d",
extrap_method=True,
ignore_degenerate=True,
reuse_weights=weights,
weights
)# Regrid dataset
return regridder(da).load()
if __name__ == "__main__":
if "get_ipython" in dir():
# Just call warp_resample if running as a Jupyter Notebook
= regrid("gpm_imerg")
da else:
# Configure dataset via argpase if running via CLI
= argparse.ArgumentParser(description="Set environment for the script.")
parser
parser.add_argument("--dataset",
="gpm_imerg",
defaulthelp="Dataset to resample.",
=["gpm_imerg", "mursst"],
choices
)
parser.add_argument("--zoom",
=0,
defaulthelp="Zoom level for tile extent.",
)= parser.parse_args()
user_args = regrid(user_args.dataset, int(user_args.zoom)) da