Comparing titiler-cmr usage against AWS lambda concurrency limits

This notebook compares tile generation using titiler-cmr on AWS lambda against the concurrency limits.

import sys

import hvplot.pandas  # noqa
import pandas as pd

sys.path.append("..")
import subprocess
from datetime import datetime

from processing import process_locust_results
from tile import generate_locust_urls

Specify file containing results from VEDA JupyterHub tests

We’ll run the tests twice, first with the default unreserved concurrency limits and second with a reserved concurrency limit of 50.

# Define URIs for results from running tile generation tests on the VEDA Hub
gpm_imerg = {
    "uri": "s3://nasa-eodc-data-store/test-results/20240816190529_CMRTileTest_C2723754850-GES_DISC.json",
    "kwargs": {"rescale": "0,455", "colormap_name": "coolwarm", "output_format": "png"},
}
mur_sst = {
    "uri": "s3://nasa-eodc-data-store/test-results/20240816191458_CMRTileTest_C1996881146-POCLOUD.json",
    "kwargs": {
        "rescale": "271,305",
        "colormap_name": "coolwarm",
        "output_format": "png",
    },
}
test_id_suffix = "_reserved_concurrency"
for ds in [gpm_imerg, mur_sst]:
    ds["test_id"] = ds["uri"].split("/")[-1].split(".")[0]
    ds["urls_output_file"] = f"urls/{ds['test_id']}{test_id_suffix}.csv"
    ds["results_output"] = f"results/{ds['test_id']}{test_id_suffix}"
    ds["df"] = generate_locust_urls(
        ds["uri"], ds["urls_output_file"], **ds["kwargs"], subset=slice(0, 50)
    )

Use locust to time tile generation

for ds in [gpm_imerg, mur_sst]:
    command = [
        "locust",
        "-f",
        "locust_titiler_cmr.py",
        "--headless",
        "--users",
        "100",
        "--iterations",
        "100",
        "--csv",
        ds["results_output"],
        "--urls-file",
        ds["urls_output_file"],
        "--csv-full-history",
        "--host",
        "https://dev-titiler-cmr.delta-backend.com",
    ]
    subprocess.run(command)

Process locust results

for ds in [gpm_imerg, mur_sst]:
    df_reserved = process_locust_results(
        f"results/{ds['test_id']}_reserved_concurrency",
        run_id="reserved",
        split_aggregated=False,
    )
    df_unreserved = process_locust_results(
        f"results/{ds['test_id']}_unreserved_concurrency",
        run_id="unreserved",
        split_aggregated=False,
    )
    combined_df = pd.concat([df_reserved, df_unreserved], axis=0).reset_index()
    columns = [
        "Average Response Time",
        "Median Response Time",
        "Min Response Time",
        "Max Response Time",
    ]
    for ind, c in enumerate(columns):
        combined_df[f"{c} (s)"] = combined_df[c] * 1e-3
        columns[ind] = f"{c} (s)"
    combined_df["concept_id"] = combined_df.apply(
        lambda x: x["url"].split("?")[1].split("&")[0].split("=")[1], axis=1
    )
    combined_df.to_csv(f"{ds['results_output']}_combined.csv")
    ds["df"] = combined_df[
        [
            "url",
            "method",
            "tile",
            "zoom",
            "run_id",
            "concept_id",
            "Request Count",
            "Failure Count",
            *columns,
            "Average Content Size",
        ]
    ]

Upload results to S3

current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
results_store = (
    f"s3://nasa-eodc-data-store/tile-benchmarking-results/{current_datetime}/"
)
subprocess.run(
    [
        "aws",
        "s3",
        "cp",
        "results/",
        results_store,
        "--recursive",
        "--exclude",
        "*",
        "--include",
        f"{ds['test_id']}{test_id_suffix}*.csv",
    ]
)

Plot results

Show failure rates for unreserved concurrency (~1000) versus reserved concurrency (50) for GPM IMERG

These results show that for a simulation of 100 users each requesting one tile at a time, roughly half of the requests will fail when the AWS Lambda is configured with a reserved concurrency of 50. The default AWS Lambda concurrency setting is 1000 concurrent executions across all functions in an account in an AWS region, although a quota increase can be requested. The concurrency limit bounds the scalability of titiler-cmr.

summary_df = (
    gpm_imerg["df"].groupby(["run_id"])[["Request Count", "Failure Count"]].agg("sum")
)
summary_df["Failure Rate (%)"] = (
    summary_df["Failure Count"] / summary_df["Request Count"] * 100
)
summary_df.hvplot.bar(
    x="run_id",
    y="Failure Rate (%)",
    xlabel="Lambda concurrency setting",
    ylim=(0, 100),
    title="GPM IMERG",
)

Show failure rates for unreserved concurrency (~1000) versus reserved concurrency (50) for MUR SST

summary_df = (
    mur_sst["df"].groupby(["run_id"])[["Request Count", "Failure Count"]].agg("sum")
)
summary_df["Failure Rate (%)"] = (
    summary_df["Failure Count"] / summary_df["Request Count"] * 100
)
summary_df.hvplot.bar(
    x="run_id",
    y="Failure Rate (%)",
    xlabel="Lambda concurrency setting",
    ylim=(0, 100),
    title="MUR SST",
)