import subprocess
from datetime import datetime
import cf_xarray # noqa
import hvplot.pandas # noqa
from cmr_tile_test import (
generate_memray_flamegraphs,
generate_memray_summaries,
process_function_memory_usage,
process_peak_memory_usage,
run_cmr_tile_tests,
)
import helpers.dataframe as dataframe_helpers
import helpers.eodc_hub_role as eodc_hub_roleTile Generation Benchmarks using CMR
Explanation
In this notebook we explore tile generation using titiler-cmr with the GPM IMERG and MUR-SST datasets.
Set parameters for benchmarks
gpm_imerg = {
"concept_id": "C2723754850-GES_DISC",
"variable": "precipitation",
"cmr_query": {"temporal": ("2000-06-01", "2000-06-01")},
"rescale": ((0, 48),),
"colormap_name": "coolwarm",
"output_format": "png",
}
mur_sst = {
"concept_id": "C1996881146-POCLOUD",
"variable": "analysed_sst",
"cmr_query": {"temporal": ("2002-06-01", "2002-06-01")},
"rescale": ((271, 305),),
"colormap_name": "coolwarm",
"output_format": "png",
}
iterations = 10Run the tests
cmr_test_results = []
cmr_test_results.append(
run_cmr_tile_tests(
gpm_imerg,
iterations,
)
)
cmr_test_results.append(run_cmr_tile_tests(mur_sst, iterations))Generate memory profiling summaries and graphs
generate_memray_summaries("results-memray", "memray-stats")
generate_memray_flamegraphs("results-memray", "memray-flamegraph")Upload memory profiles to S3
current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
results_store = (
f"s3://nasa-eodc-data-store/tile-benchmarking-profiling-results/{current_datetime}/"
)
subprocess.run(
[
"aws",
"s3",
"cp",
"results-memray/",
results_store,
"--recursive",
"--exclude",
"*",
"--include",
"CMRTileTest*",
]
)Load + plot results
# Define test results from previous run
cmr_test_results = [
"s3://nasa-eodc-data-store/test-results/20240816190529_CMRTileTest_C2723754850-GES_DISC.json",
"s3://nasa-eodc-data-store/test-results/20240816191458_CMRTileTest_C1996881146-POCLOUD.json",
]credentials = eodc_hub_role.fetch_and_set_credentials()
df = dataframe_helpers.load_all_into_dataframe(
credentials, cmr_test_results, use_boto3=False
)
df = dataframe_helpers.expand_timings(df)
df["Time (s)"] = df["time"] * 1e-3
df_peak_memory = process_peak_memory_usage("memray-stats")
df_function_memory = process_function_memory_usage("memray-stats")plt_opts = {"width": 1800, "height": 400, "xrotation": 90}df.hvplot.box(
y="Time (s)",
by=["zoom", "dataset_id"],
ylabel="Time to tile (s)",
xlabel="Zoom level",
legend=False,
title="Time to tile using titiler-cmr",
).opts(**plt_opts)BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('Time (s)', 31), ('Time_left_parenthesis_s_right_parenthesis', 0), ('index', 31)
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('Time (s)', 31), ('Time_left_parenthesis_s_right_parenthesis', 0), ('index', 31)
df_function_memory.hvplot.box(
y="size (GB)",
by=["zoom", "method"],
xlabel="Zoom level",
ylabel="memory allocated (GB)",
title="Functions allocating over 1 GB of memory",
).opts(**plt_opts)BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('index', 9), ('size (GB)', 9), ('size_left_parenthesis_GB_right_parenthesis', 0)
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('index', 9), ('size (GB)', 9), ('size_left_parenthesis_GB_right_parenthesis', 0)
df_peak_memory.hvplot.scatter(
x="zoom",
y="peak memory (GB)",
by="dataset",
title="Peak memory usage during tile generation",
xlabel="Zoom level",
group_label="dataset",
)