%load_ext autoreload
%autoreload
# External modules
import hvplot.pandas
import holoviews as hv
import json
import pandas as pd
= 'holoviews'
pd.options.plotting.backend import warnings
'ignore')
warnings.filterwarnings(
# Local modules
import sys; sys.path.append('..')
import helpers.eodc_hub_role as eodc_hub_role
import helpers.dataframe as dataframe_helpers
from xarray_tile_test import XarrayTileTest
Tile Generation Benchmarks for a Zarr Pyramid
Explanation
In this notebook we return to the CMIP6 data to compare the performance of tiling the original data with a pyramid. This helps us understand the performance improvements at lower zoom levels when a pyramid is available.
Setup
= eodc_hub_role.fetch_and_set_credentials() credentials
We load the pyramid and the zarr dataset with the same chunk shape as the original dataset. We expect this dataset and the kerchunk performance to be about the same.
= 10
iterations = range(4)
zooms = json.loads(open('../01-generate-datasets/cmip6-zarr-datasets.json').read())
cmip6_zarr_datasets = list({k: v for k, v in cmip6_zarr_datasets.items() if '600_1440_1' in k}.items())[0]
zarr_dataset_id, zarr_dataset = json.loads(open('../01-generate-datasets/cmip6-pyramid-datasets.json').read()) pyramid_datasets
Run Tests
= []
results
= XarrayTileTest(
zarr_tile_test =zarr_dataset_id,
dataset_id**zarr_dataset
)
# Run it multiple times for each zoom level
for zoom in zooms:
'zoom': zoom}, batch_size=iterations)
zarr_tile_test.run_batch({
results.append(zarr_tile_test.store_results(credentials))
Wrote instance data to s3://nasa-eodc-data-store/test-results/20230911155452_XarrayTileTest_600_1440_1_CMIP6_daily_GISS-E2-1-G_tas.zarr.json
for pyramid_dataset_id, pyramid_dataset in pyramid_datasets.items():
= XarrayTileTest(
pyramid_tile_test =pyramid_dataset_id,
dataset_id**pyramid_dataset
)
# Run it multiple times for each zoom level
for zoom in zooms:
'zoom': zoom}, batch_size=iterations)
pyramid_tile_test.run_batch({
results.append(pyramid_tile_test.store_results(credentials))
Wrote instance data to s3://nasa-eodc-data-store/test-results/20230911155458_XarrayTileTest_cmip6-pyramid-reprojected.json
Wrote instance data to s3://nasa-eodc-data-store/test-results/20230911155506_XarrayTileTest_cmip6-pyramid-coarsened.json
Read and Plot Results
= dataframe_helpers.load_all_into_dataframe(credentials, results)
all_df = dataframe_helpers.expand_timings(all_df) expanded_df
'dataset_id'] == zarr_dataset_id, 'data_format'] = 'Raw'
expanded_df.loc[expanded_df['dataset_id'] == 'cmip6-pyramid-reprojected', 'data_format'] = 'Reprojected Pyramid'
expanded_df.loc[expanded_df['dataset_id'] == 'cmip6-pyramid-coarsened', 'data_format'] = 'Coarsened Pyramid' expanded_df.loc[expanded_df[
= ["#994F00", "#006CD1"]
cmap
= {"width": 400, "height": 300}
plt_opts
= []
plts
for zoom_level in zooms:
= expanded_df[expanded_df["zoom"] == zoom_level]
df_level
plts.append(
df_level.hvplot.box(="time",
y=["data_format"],
by="data_format",
c=cmap,
cmap="Time to render (ms)",
ylabel="Data Format",
xlabel=False,
legend=f"Zoom level {zoom_level}",
title**plt_opts)
).opts(
)2) hv.Layout(plts).cols(
'results-csvs/05-cmip6-pyramid-results.csv') expanded_df.to_csv(