# Import libraries
import os
import pandas as pd
import hvplot.pandas
import holoviews as hv
= 'holoviews'
pd.options.plotting.backend import warnings
'ignore')
warnings.filterwarnings(import sys
'../helpers/')
sys.path.append(import eodc_hub_role
= eodc_hub_role.fetch_and_set_credentials() credentials
Comparing prod and dev
This notebook compares results between prod and dev titiler deployments. Running end-to-end benchmarks is documented in https://github.com/developmentseed/tile-benchmarking/tree/main/03-e2e/README.md.
%%capture
!rm -rf downloaded_*_results/
!aws s3 cp --recursive s3://nasa-eodc-data-store/tile-benchmarking-results/2023-11-22_17-09-28/ downloaded_dev_results/
!aws s3 cp --recursive s3://nasa-eodc-data-store/tile-benchmarking-results/2023-11-22_17-11-37/ downloaded_prod_results/
Parse and merge results into a single dataframe.
= { 'prod': {}, 'dev': {} }
results for env in results.keys():
# Specify the directory path and the suffix
= f"downloaded_{env}_results/"
directory_path = "_urls_stats.csv" # For example, if you're interested in text files
suffix
# List all files in the directory
= os.listdir(directory_path)
all_files
# Filter the files to only include those that end with the specified suffix
= [f"{directory_path}{f}" for f in all_files if f.endswith(suffix)]
files_with_suffix
= []
dfs for file in files_with_suffix:
= pd.read_csv(file)
df 'file'] = file
df[
dfs.append(df)
= pd.concat(dfs)
merged_df 'dataset'] = [file.split('/')[1].replace('_urls_stats.csv', '') for file in merged_df['file']]
merged_df['all'] = merged_df
results[env][# The "Aggregated" results represent aggregations across tile endpoints.
f'Aggregated {env}'] = merged_df[merged_df['Name'] == 'Aggregated'] results[env][
= results['prod'][f'Aggregated prod']
prod_df = results['dev'][f'Aggregated dev']
dev_df = pd.merge(prod_df, dev_df, on='dataset', suffixes=(' Prod', ' Dev'))
merged_df 'Failure Rate Prod'] = merged_df['Failure Count Prod']/merged_df['Request Count Prod'] * 100
merged_df['Failure Rate Dev'] = merged_df['Failure Count Dev']/merged_df['Request Count Dev'] * 100
merged_df['Median Response Time Prod', 'Failure Rate Prod', 'Median Response Time Dev', 'Failure Rate Dev', 'dataset']].sort_values('Median Response Time Dev') merged_df[[
Median Response Time Prod | Failure Rate Prod | Median Response Time Dev | Failure Rate Dev | dataset | |
---|---|---|---|---|---|
2 | 460.0 | 0.0 | 200.0 | 0.0 | pr_day_ACCESS-CM2_historical_r1i1p1f1_gn_1950.nc |
5 | 100.0 | 100.0 | 210.0 | 0.0 | 3B42_Daily.19980101.7.nc4 |
0 | 100.0 | 100.0 | 240.0 | 0.0 | GLDAS_NOAH025_3H.A20230731.2100.021.nc4 |
4 | 500.0 | 0.0 | 290.0 | 0.0 | power_901_monthly_meteorology_utc.zarr |
3 | 470.0 | 0.0 | 420.0 | 0.0 | combined_CMIP6_daily_GISS-E2-1-G_tas_kerchunk.... |
6 | 580.0 | 0.0 | 440.0 | 0.0 | cmip6-pds_GISS-E2-1-G_historical_tas |
1 | 110.0 | 100.0 | 640.0 | 0.0 | 3B-DAY.MS.MRG.3IMERG.20000601-S000000-E235959.... |
7 | 830.0 | 0.0 | 690.0 | 0.0 | 20231107090000-JPL-L4_GHRSST-SSTfnd-MUR-GLOB-v... |