Resampling with rasterio (S3 storage, NetCDF File, NetCDF4 driver, VSIS3 virtualization, earthaccess auth)

import argparse
import json

import geopandas as gpd
import numpy as np
import rasterio
from rasterio.mask import mask
from rasterio.session import AWSSession
from rasterio.warp import reproject
from shapely.geometry import box
def configure_auth(daac):
    """Authenticate using earthaccess"""
    import boto3
    import earthaccess

    auth = earthaccess.login()
    s3_credentials = auth.get_s3_credentials(daac)
    session = boto3.Session(
        aws_access_key_id=s3_credentials["accessKeyId"],
        aws_secret_access_key=s3_credentials["secretAccessKey"],
        aws_session_token=s3_credentials["sessionToken"],
        region_name="us-west-2",
    )
    rio_env = rasterio.Env(
        AWSSession(session),
    )
    rio_env.__enter__()
    return rio_env
def warp_resample(dataset, zoom=0):
    from common import earthaccess_args, target_extent

    te = target_extent[zoom]

    # Define filepath, driver, and variable information
    args = earthaccess_args[dataset]
    src = f'NETCDF:/vsis3/{args["bucket"]}/{args["folder"]}/{args["filename"]}:{args["variable"]}'
    # Authenticate with earthaccess
    with configure_auth(args["daac"]):
        # Define source and target projection
        dstSRS = "EPSG:3857"
        srcSRS = "EPSG:4326"
        width = height = 256
        with rasterio.open(src) as da:
            # Clip dataset to bounds of Web Mercator tile
            bbox = box(*te)
            geo = gpd.GeoDataFrame(
                {"geometry": bbox}, index=[0], crs=int(dstSRS.split(":")[1])
            )
            geo = geo.to_crs(crs=srcSRS)
            coords = [json.loads(geo.to_json())["features"][0]["geometry"]]
            arr, src_transform = mask(da, shapes=coords, crop=True)
            # Mask and fill array
            ma = arr.astype("float32", casting="unsafe")
            np.multiply(ma, da.scales[0], out=ma, casting="unsafe")
            np.add(ma, da.offsets[0], out=ma, casting="unsafe")
            # Define affine transformation from input to output dataset
            dst_transform = rasterio.transform.from_bounds(*te, width, height)
            # Create array to host results
            destination = np.zeros((height, width), np.float32)
            # Reproject dataset
            _, transform = reproject(
                ma.squeeze(),
                destination,
                src_crs=srcSRS,
                src_transform=src_transform,
                dst_crs=dstSRS,
                dst_transform=dst_transform,
            )
            return destination
if __name__ == "__main__":
    if "get_ipython" in dir():
        # Just call warp_resample if running as a Jupyter Notebook
        da = warp_resample("mursst")
    else:
        # Configure dataset via argpase if running via CLI
        parser = argparse.ArgumentParser(description="Set environment for the script.")
        parser.add_argument(
            "--dataset",
            default="mursst",
            help="Dataset to resample.",
            choices=["mursst"],
        )
        parser.add_argument(
            "--zoom",
            default=0,
            help="Zoom level for tile extent.",
        )
        user_args = parser.parse_args()
        da = warp_resample(user_args.dataset, int(user_args.zoom))