# Import libraries
import os
import pandas as pd
import hvplot.pandas
import holoviews as hv
pd.options.plotting.backend = 'holoviews'
import warnings
warnings.filterwarnings('ignore')
import sys
sys.path.append('../helpers/')
import eodc_hub_role
credentials = eodc_hub_role.fetch_and_set_credentials()Comparing prod and dev
This notebook compares results between prod and dev titiler deployments. Running end-to-end benchmarks is documented in https://github.com/developmentseed/tile-benchmarking/tree/main/03-e2e/README.md.
%%capture
!rm -rf downloaded_*_results/
!aws s3 cp --recursive s3://nasa-eodc-data-store/tile-benchmarking-results/2023-11-22_17-09-28/ downloaded_dev_results/
!aws s3 cp --recursive s3://nasa-eodc-data-store/tile-benchmarking-results/2023-11-22_17-11-37/ downloaded_prod_results/Parse and merge results into a single dataframe.
results = { 'prod': {}, 'dev': {} }
for env in results.keys():
    # Specify the directory path and the suffix
    directory_path = f"downloaded_{env}_results/"
    suffix = "_urls_stats.csv"  # For example, if you're interested in text files
    # List all files in the directory
    all_files = os.listdir(directory_path)
    # Filter the files to only include those that end with the specified suffix
    files_with_suffix = [f"{directory_path}{f}" for f in all_files if f.endswith(suffix)]
    dfs = []
    for file in files_with_suffix:
        df = pd.read_csv(file)
        df['file'] = file
        dfs.append(df)
    merged_df = pd.concat(dfs)
    merged_df['dataset'] = [file.split('/')[1].replace('_urls_stats.csv', '') for file in merged_df['file']]
    results[env]['all'] = merged_df
    # The "Aggregated" results represent aggregations across tile endpoints. 
    results[env][f'Aggregated {env}'] = merged_df[merged_df['Name'] == 'Aggregated']prod_df = results['prod'][f'Aggregated prod']
dev_df = results['dev'][f'Aggregated dev']
merged_df = pd.merge(prod_df, dev_df, on='dataset', suffixes=(' Prod', ' Dev'))
merged_df['Failure Rate Prod'] = merged_df['Failure Count Prod']/merged_df['Request Count Prod'] * 100
merged_df['Failure Rate Dev'] = merged_df['Failure Count Dev']/merged_df['Request Count Dev'] * 100
merged_df[['Median Response Time Prod', 'Failure Rate Prod', 'Median Response Time Dev', 'Failure Rate Dev', 'dataset']].sort_values('Median Response Time Dev')| Median Response Time Prod | Failure Rate Prod | Median Response Time Dev | Failure Rate Dev | dataset | |
|---|---|---|---|---|---|
| 2 | 460.0 | 0.0 | 200.0 | 0.0 | pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_1950.nc | 
| 5 | 100.0 | 100.0 | 210.0 | 0.0 | 3B42_Daily.19980101.7.nc4 | 
| 0 | 100.0 | 100.0 | 240.0 | 0.0 | GLDAS_NOAH025_3H.A20230731.2100.021.nc4 | 
| 4 | 500.0 | 0.0 | 290.0 | 0.0 | power_901_monthly_meteorology_utc.zarr | 
| 3 | 470.0 | 0.0 | 420.0 | 0.0 | combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk.... | 
| 6 | 580.0 | 0.0 | 440.0 | 0.0 | cmip6-pds_GISS-E2-1-G_historical_tas | 
| 1 | 110.0 | 100.0 | 640.0 | 0.0 | 3B-DAY.MS.MRG.3IMERG.20000601-S000000-E235959.... | 
| 7 | 830.0 | 0.0 | 690.0 | 0.0 | 20231107090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v... |