import subprocess
from datetime import datetime
import cf_xarray # noqa
import hvplot.pandas # noqa
from cmr_tile_test import (
generate_memray_flamegraphs,
generate_memray_summaries,
process_function_memory_usage,
process_peak_memory_usage,
run_cmr_tile_tests,
)
import helpers.dataframe as dataframe_helpers
import helpers.eodc_hub_role as eodc_hub_role
Tile Generation Benchmarks using CMR
Explanation
In this notebook we explore tile generation using titiler-cmr with the GPM IMERG and MUR-SST datasets.
Set parameters for benchmarks
= {
gpm_imerg "concept_id": "C2723754850-GES_DISC",
"variable": "precipitation",
"cmr_query": {"temporal": ("2000-06-01", "2000-06-01")},
"rescale": ((0, 48),),
"colormap_name": "coolwarm",
"output_format": "png",
}= {
mur_sst "concept_id": "C1996881146-POCLOUD",
"variable": "analysed_sst",
"cmr_query": {"temporal": ("2002-06-01", "2002-06-01")},
"rescale": ((271, 305),),
"colormap_name": "coolwarm",
"output_format": "png",
}= 10 iterations
Run the tests
= []
cmr_test_results
cmr_test_results.append(
run_cmr_tile_tests(
gpm_imerg,
iterations,
)
) cmr_test_results.append(run_cmr_tile_tests(mur_sst, iterations))
Generate memory profiling summaries and graphs
"results-memray", "memray-stats")
generate_memray_summaries("results-memray", "memray-flamegraph") generate_memray_flamegraphs(
Upload memory profiles to S3
= datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
current_datetime = (
results_store f"s3://nasa-eodc-data-store/tile-benchmarking-profiling-results/{current_datetime}/"
)
subprocess.run(
["aws",
"s3",
"cp",
"results-memray/",
results_store,"--recursive",
"--exclude",
"*",
"--include",
"CMRTileTest*",
] )
Load + plot results
# Define test results from previous run
= [
cmr_test_results "s3://nasa-eodc-data-store/test-results/20240816190529_CMRTileTest_C2723754850-GES_DISC.json",
"s3://nasa-eodc-data-store/test-results/20240816191458_CMRTileTest_C1996881146-POCLOUD.json",
]
= eodc_hub_role.fetch_and_set_credentials()
credentials = dataframe_helpers.load_all_into_dataframe(
df =False
credentials, cmr_test_results, use_boto3
)= dataframe_helpers.expand_timings(df)
df "Time (s)"] = df["time"] * 1e-3
df[= process_peak_memory_usage("memray-stats")
df_peak_memory = process_function_memory_usage("memray-stats") df_function_memory
= {"width": 1800, "height": 400, "xrotation": 90} plt_opts
df.hvplot.box(="Time (s)",
y=["zoom", "dataset_id"],
by="Time to tile (s)",
ylabel="Zoom level",
xlabel=False,
legend="Time to tile using titiler-cmr",
title**plt_opts) ).opts(
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('Time (s)', 31), ('Time_left_parenthesis_s_right_parenthesis', 0), ('index', 31)
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('Time (s)', 31), ('Time_left_parenthesis_s_right_parenthesis', 0), ('index', 31)
df_function_memory.hvplot.box(="size (GB)",
y=["zoom", "method"],
by="Zoom level",
xlabel="memory allocated (GB)",
ylabel="Functions allocating over 1 GB of memory",
title**plt_opts) ).opts(
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('index', 9), ('size (GB)', 9), ('size_left_parenthesis_GB_right_parenthesis', 0)
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('index', 9), ('size (GB)', 9), ('size_left_parenthesis_GB_right_parenthesis', 0)
df_peak_memory.hvplot.scatter(="zoom",
x="peak memory (GB)",
y="dataset",
by="Peak memory usage during tile generation",
title="Zoom level",
xlabel="dataset",
group_label )