from pathlib import Path
import earthaccess
import fsspec
import xarray as xr
from virtualizarr import open_virtual_dataset
Download and virtualize MUR-SST data
Authenticate via earthaccess
earthaccess.login()
Download MUR-SST dataset
= earthaccess.search_data(
results ="C1996881146-POCLOUD", count=1, temporal=("2002-06-01", "2002-06-01")
concept_id
)= earthaccess.download(results, "earthaccess_data")[0] fp
Virtualize MUR-SST dataset
def virtualize_dataset(local_fp):
"""Create a virtual reference file for a dataset"""
def local_to_s3_url(old_local_path: str) -> str:
"""Replace local path to s3 uri for all chucks"""
= Path("/".join(s3_uri.split("/")[1:-1]))
new_s3_bucket_url = Path(old_local_path).name
filename = f"s3:/{str(new_s3_bucket_url / filename)}"
new_path return new_path
= results[0].data_links(access="direct")[0]
s3_uri = f"earthaccess_data/{s3_uri.split('/')[-1].strip('.nc')}.json"
output_fp print(output_fp)
= open_virtual_dataset(str(local_fp), indexes={})
virtual_ds = virtual_ds.virtualize.rename_paths(local_to_s3_url)
virtual_ds format="json")
virtual_ds.virtualize.to_kerchunk(output_fp, return output_fp
= virtualize_dataset(fp) output_fp
Load results
= (
output_fp "earthaccess_data/20020601090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v02.0-fv04.1.json"
)
earthaccess.login()= earthaccess.get_s3fs_session(daac="PODAAC")
s3_fs = s3_fs.storage_options.copy()
storage_options = fsspec.filesystem("reference", fo=output_fp)
fs
= fs.get_mapper("")
m = xr.open_dataset(
ds ="kerchunk", chunks={}, storage_options=storage_options
m, engine# normal xarray.Dataset object, wrapping dask/numpy arrays etc. )