Download and virtualize MUR-SST data

from pathlib import Path

import earthaccess
import fsspec
import xarray as xr
from virtualizarr import open_virtual_dataset

Authenticate via earthaccess

earthaccess.login()

Download MUR-SST dataset

results = earthaccess.search_data(
    concept_id="C1996881146-POCLOUD", count=1, temporal=("2002-06-01", "2002-06-01")
)
fp = earthaccess.download(results, "earthaccess_data")[0]

Virtualize MUR-SST dataset

def virtualize_dataset(local_fp):
    """Create a virtual reference file for a dataset"""

    def local_to_s3_url(old_local_path: str) -> str:
        """Replace local path to s3 uri for all chucks"""

        new_s3_bucket_url = Path("/".join(s3_uri.split("/")[1:-1]))
        filename = Path(old_local_path).name
        new_path = f"s3:/{str(new_s3_bucket_url / filename)}"
        return new_path

    s3_uri = results[0].data_links(access="direct")[0]
    output_fp = f"earthaccess_data/{s3_uri.split('/')[-1].strip('.nc')}.json"
    print(output_fp)
    virtual_ds = open_virtual_dataset(str(local_fp), indexes={})
    virtual_ds = virtual_ds.virtualize.rename_paths(local_to_s3_url)
    virtual_ds.virtualize.to_kerchunk(output_fp, format="json")
    return output_fp
output_fp = virtualize_dataset(fp)

Load results

output_fp = (
    "earthaccess_data/20020601090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1.json"
)
earthaccess.login()
s3_fs = earthaccess.get_s3fs_session(daac="PODAAC")
storage_options = s3_fs.storage_options.copy()
fs = fsspec.filesystem("reference", fo=output_fp)

m = fs.get_mapper("")
ds = xr.open_dataset(
    m, engine="kerchunk", chunks={}, storage_options=storage_options
)  # normal xarray.Dataset object, wrapping dask/numpy arrays etc.