Comparing titiler-cmr usage

This notebook compares tile generation using titiler-cmr on AWS lambda to running titiler-cmr on the VEDA JupyterHub.

import sys

import hvplot.pandas  # noqa
import pandas as pd

sys.path.append("..")
import subprocess
from datetime import datetime

from processing import process_locust_results
from tile import generate_locust_urls

Specify file containing results from VEDA JupyterHub tests

# Define URIs for results from running tile generation tests on the VEDA Hub
gpm_imerg = {
    "uri": "s3://nasa-eodc-data-store/test-results/20240816190529_CMRTileTest_C2723754850-GES_DISC.json",
    "kwargs": {"rescale": "0,455", "colormap_name": "coolwarm", "output_format": "png"},
}
mur_sst = {
    "uri": "s3://nasa-eodc-data-store/test-results/20240816191458_CMRTileTest_C1996881146-POCLOUD.json",
    "kwargs": {
        "rescale": "271,305",
        "colormap_name": "coolwarm",
        "output_format": "png",
    },
}
for ds in [gpm_imerg, mur_sst]:
    ds["test_id"] = ds["uri"].split("/")[-1].split(".")[0]
    ds["urls_output_file"] = f"urls/{ds['test_id']}.csv"
    ds["results_output"] = f"results/{ds['test_id']}"
    ds["df"] = generate_locust_urls(ds["uri"], ds["urls_output_file"], **ds["kwargs"])

Use locust to time tile generation

for ds in [gpm_imerg, mur_sst]:
    command = [
        "locust",
        "-f",
        "locust_titiler_cmr.py",
        "--headless",
        "--users",
        "1",
        "--iterations",
        "1",
        "--csv",
        ds["results_output"],
        "--urls-file",
        ds["urls_output_file"],
        "--csv-full-history",
        "--host",
        "https://dev-titiler-cmr.delta-backend.com",
    ]
    subprocess.run(command)

Process locust results and combine with JupyterHub results

for ds in [gpm_imerg, mur_sst]:
    locust_df = process_locust_results(ds["results_output"])
    combined_df = pd.concat([ds["df"], locust_df], axis=0).reset_index()
    combined_df["Time (s)"] = combined_df["Response Time"] * 1e-3
    combined_df["concept_id"] = combined_df.apply(
        lambda x: x["url"].split("?")[1].split("&")[0].split("=")[1], axis=1
    )
    combined_df.to_csv(f"{ds['results_output']}_combined.csv")
    ds["df"] = combined_df[["url", "method", "tile", "zoom", "concept_id", "Time (s)"]]

Upload results to S3

current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
results_store = (
    f"s3://nasa-eodc-data-store/tile-benchmarking-results/{current_datetime}/"
)
subprocess.run(
    [
        "aws",
        "s3",
        "cp",
        "results/",
        results_store,
        "--recursive",
        "--exclude",
        "*",
        "--include",
        f"{ds['results_output']}*.csv",
    ]
)

Plot results

cmap = ["#E1BE6A", "#40B0A6"]
plt_opts = {"width": 1800, "height": 400, "xrotation": 90}

gpm_imerg["df"].hvplot.box(
    y="Time (s)",
    by=["zoom", "method"],
    c="method",
    cmap=cmap,
    ylabel="Response time (s)",
    xlabel="Zoom level",
    legend=False,
).opts(**plt_opts)

BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('Time (s)', 31), ('Time_left_parenthesis_s_right_parenthesis', 0), ('index', 31)
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('Time (s)', 31), ('Time_left_parenthesis_s_right_parenthesis', 0), ('index', 31)

mur_sst["df"].hvplot.box(
    y="Time (s)",
    by=["zoom", "method"],
    c="method",
    cmap=cmap,
    ylabel="Response time (s)",
    xlabel="Zoom level",
    legend=False,
).opts(**plt_opts)

BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('Time (s)', 25), ('Time_left_parenthesis_s_right_parenthesis', 0), ('index', 25)
BokehUserWarning: ColumnDataSource's columns must be of the same length. Current lengths: ('Time (s)', 25), ('Time_left_parenthesis_s_right_parenthesis', 0), ('index', 25)