Tile Generation Benchmarks using CMR

Explanation

In this notebook we explore tile generation using titiler-cmr with the GPM IMERG and MUR-SST datasets.

import subprocess
from datetime import datetime

import cf_xarray  # noqa
import hvplot.pandas  # noqa
from cmr_tile_test import (
    generate_memray_flamegraphs,
    generate_memray_summaries,
    process_function_memory_usage,
    process_peak_memory_usage,
    run_cmr_tile_tests,
)

import helpers.dataframe as dataframe_helpers
import helpers.eodc_hub_role as eodc_hub_role

Set parameters for benchmarks

gpm_imerg = {
    "concept_id": "C2723754850-GES_DISC",
    "variable": "precipitation",
    "cmr_query": {"temporal": ("2000-06-01", "2000-06-01")},
    "rescale": ((0, 48),),
    "colormap_name": "coolwarm",
    "output_format": "png",
}
mur_sst = {
    "concept_id": "C1996881146-POCLOUD",
    "variable": "analysed_sst",
    "cmr_query": {"temporal": ("2002-06-01", "2002-06-01")},
    "rescale": ((271, 305),),
    "colormap_name": "coolwarm",
    "output_format": "png",
}
iterations = 10

Run the tests

cmr_test_results = []
cmr_test_results.append(
    run_cmr_tile_tests(
        gpm_imerg,
        iterations,
    )
)
cmr_test_results.append(run_cmr_tile_tests(mur_sst, iterations))

Generate memory profiling summaries and graphs

generate_memray_summaries("results-memray", "memray-stats")
generate_memray_flamegraphs("results-memray", "memray-flamegraph")

Upload memory profiles to S3

current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
results_store = (
    f"s3://nasa-eodc-data-store/tile-benchmarking-profiling-results/{current_datetime}/"
)
subprocess.run(
    [
        "aws",
        "s3",
        "cp",
        "results-memray/",
        results_store,
        "--recursive",
        "--exclude",
        "*",
        "--include",
        "CMRTileTest*",
    ]
)

Load + plot results

# Define test results from previous run
cmr_test_results = [
    "s3://nasa-eodc-data-store/test-results/20240816190529_CMRTileTest_C2723754850-GES_DISC.json",
    "s3://nasa-eodc-data-store/test-results/20240816191458_CMRTileTest_C1996881146-POCLOUD.json",
]
credentials = eodc_hub_role.fetch_and_set_credentials()
df = dataframe_helpers.load_all_into_dataframe(
    credentials, cmr_test_results, use_boto3=False
)
df = dataframe_helpers.expand_timings(df)
df["Time (s)"] = df["time"] * 1e-3
df_peak_memory = process_peak_memory_usage("memray-stats")
df_function_memory = process_function_memory_usage("memray-stats")
plt_opts = {"width": 1800, "height": 400, "xrotation": 90}
df.hvplot.box(
    y="Time (s)",
    by=["zoom", "dataset_id"],
    ylabel="Time to tile (s)",
    xlabel="Zoom level",
    legend=False,
    title="Time to tile using titiler-cmr",
).opts(**plt_opts)
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('Time (s)', 31), ('Time_left_parenthesis_s_right_parenthesis', 0), ('index', 31)
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('Time (s)', 31), ('Time_left_parenthesis_s_right_parenthesis', 0), ('index', 31)
df_function_memory.hvplot.box(
    y="size (GB)",
    by=["zoom", "method"],
    xlabel="Zoom level",
    ylabel="memory allocated (GB)",
    title="Functions allocating over 1 GB of memory",
).opts(**plt_opts)
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('index', 9), ('size (GB)', 9), ('size_left_parenthesis_GB_right_parenthesis', 0)
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('index', 9), ('size (GB)', 9), ('size_left_parenthesis_GB_right_parenthesis', 0)
df_peak_memory.hvplot.scatter(
    x="zoom",
    y="peak memory (GB)",
    by="dataset",
    title="Peak memory usage during tile generation",
    xlabel="Zoom level",
    group_label="dataset",
)