%load_ext autoreload
%autoreload
# External modules
import hvplot.pandas
import holoviews as hv
import json
import pandas as pd
pd.options.plotting.backend = 'holoviews'
import warnings
warnings.filterwarnings('ignore')
# Local modules
import sys; sys.path.append('..')
import helpers.eodc_hub_role as eodc_hub_role
import helpers.dataframe as dataframe_helpers
from xarray_tile_test import XarrayTileTestTile Generation Benchmarks for a Zarr Pyramid
Explanation
In this notebook we return to the CMIP6 data to compare the performance of tiling the original data with a pyramid. This helps us understand the performance improvements at lower zoom levels when a pyramid is available.
Setup
credentials = eodc_hub_role.fetch_and_set_credentials()We load the pyramid and the zarr dataset with the same chunk shape as the original dataset. We expect this dataset and the kerchunk performance to be about the same.
iterations = 10
zooms = range(4)
cmip6_zarr_datasets = json.loads(open('../01-generate-datasets/cmip6-zarr-datasets.json').read())
zarr_dataset_id, zarr_dataset = list({k: v for k, v in cmip6_zarr_datasets.items() if '600_1440_1' in k}.items())[0]
pyramid_datasets = json.loads(open('../01-generate-datasets/cmip6-pyramid-datasets.json').read())Run Tests
results = []
zarr_tile_test = XarrayTileTest(
dataset_id=zarr_dataset_id,
**zarr_dataset
)
# Run it multiple times for each zoom level
for zoom in zooms:
zarr_tile_test.run_batch({'zoom': zoom}, batch_size=iterations)
results.append(zarr_tile_test.store_results(credentials))Wrote instance data to s3://nasa-eodc-data-store/test-results/20230911155452_XarrayTileTest_600_1440_1_CMIP6_daily_GISS-E2-1-G_tas.zarr.json
for pyramid_dataset_id, pyramid_dataset in pyramid_datasets.items():
pyramid_tile_test = XarrayTileTest(
dataset_id=pyramid_dataset_id,
**pyramid_dataset
)
# Run it multiple times for each zoom level
for zoom in zooms:
pyramid_tile_test.run_batch({'zoom': zoom}, batch_size=iterations)
results.append(pyramid_tile_test.store_results(credentials))Wrote instance data to s3://nasa-eodc-data-store/test-results/20230911155458_XarrayTileTest_cmip6-pyramid-reprojected.json
Wrote instance data to s3://nasa-eodc-data-store/test-results/20230911155506_XarrayTileTest_cmip6-pyramid-coarsened.json
Read and Plot Results
all_df = dataframe_helpers.load_all_into_dataframe(credentials, results)
expanded_df = dataframe_helpers.expand_timings(all_df)expanded_df.loc[expanded_df['dataset_id'] == zarr_dataset_id, 'data_format'] = 'Raw'
expanded_df.loc[expanded_df['dataset_id'] == 'cmip6-pyramid-reprojected', 'data_format'] = 'Reprojected Pyramid'
expanded_df.loc[expanded_df['dataset_id'] == 'cmip6-pyramid-coarsened', 'data_format'] = 'Coarsened Pyramid'cmap = ["#994F00", "#006CD1"]
plt_opts = {"width": 400, "height": 300}
plts = []
for zoom_level in zooms:
df_level = expanded_df[expanded_df["zoom"] == zoom_level]
plts.append(
df_level.hvplot.box(
y="time",
by=["data_format"],
c="data_format",
cmap=cmap,
ylabel="Time to render (ms)",
xlabel="Data Format",
legend=False,
title=f"Zoom level {zoom_level}",
).opts(**plt_opts)
)
hv.Layout(plts).cols(2)expanded_df.to_csv('results-csvs/05-cmip6-pyramid-results.csv')