Testing NetCDF-4 collections¶
In the previous step, we created a csv file that includes the collection ids and variable names for each collection. In this step, we will run a compatibility check to see if the variables are compatible with TiTiler-CMR.
In [ ]:
Copied!
import ast
import random
from datetime import datetime as dt, UTC, timedelta
import pandas as pd
from datacube_benchmark.titiler import DatasetParams, check_titiler_cmr_compatibility
import ast
import random
from datetime import datetime as dt, UTC, timedelta
import pandas as pd
from datacube_benchmark.titiler import DatasetParams, check_titiler_cmr_compatibility
In [ ]:
Copied!
df_read = pd.read_csv("output/cmr_collections_netcdf4_updated_saved_all.csv")
df_read = df_read.dropna(subset=["variables"]).copy()
df_read.head()
df_read = pd.read_csv("output/cmr_collections_netcdf4_updated_saved_all.csv")
df_read = df_read.dropna(subset=["variables"]).copy()
df_read.head()
In [ ]:
Copied!
import re
def extract_status_code(error):
if pd.isna(error) or error is None:
return None
match = re.search(r"(?<!\d)([1-5]\d{2})(?!\d)", str(error))
if match:
return match.group(1)
return None
endpoint = "https://staging.openveda.cloud/api/titiler-cmr"
for col in ["compatible", "compat_error", "status_code"]:
if col not in df_read.columns:
df_read[col] = None
for actual_idx, row in df_read.iterrows():
concept_id = row["concept_id"]
begin = row["begin_time"]
end = (
row["end_time"]
if pd.notna(row["end_time"])
else (dt.now(UTC) - timedelta(days=1)).strftime("%Y-%m-%dT%H:%M:%SZ")
)
variable_list = row["variables"]
variable = None
if isinstance(variable_list, str):
try:
variable_list = ast.literal_eval(variable_list)
except Exception as e:
print(f"⚠️ [{actual_idx}] Failed to parse variable list: {e}")
df_read.at[actual_idx, "compatible"] = False
df_read.at[actual_idx, "compat_error"] = (
f"Failed to parse variable list: {e}"
)
df_read.at[actual_idx, "status_code"] = extract_status_code(str(e))
continue
elif isinstance(variable_list, float):
variable_list = []
# -- pick random variable
if isinstance(variable_list, list) and variable_list:
variable = random.choice(variable_list)
print(f"\n🔍 [{actual_idx}] Checking: {concept_id}")
print(f"🕒 [{actual_idx}] Time: {begin} → {end}")
print(
f"📦 [{actual_idx}] Variable list: {variable_list}, Selected Variable: {variable}"
)
if variable is None:
print(f"⏭️ [{actual_idx}] Skipping {concept_id} - no variable found")
df_read.at[actual_idx, "compatible"] = False
df_read.at[actual_idx, "compat_error"] = "No variable found"
df_read.at[actual_idx, "status_code"] = None
continue
# Pick a random week
try:
dt_begin = dt.fromisoformat(begin.replace("Z", "+00:00"))
dt_end = dt.fromisoformat(end.replace("Z", "+00:00"))
delta_days = (dt_end - dt_begin).days
if delta_days < 7:
datetime_range = f"{dt_begin.strftime('%Y-%m-%dT%H:%M:%SZ')}/{dt_end.strftime('%Y-%m-%dT%H:%M:%SZ')}"
print(
f"📆 [{actual_idx}] Time range < 7 days, using full range: {datetime_range}"
)
continue
offset_days = random.randint(0, delta_days - 7)
week_start = dt_begin + timedelta(days=offset_days)
week_end = week_start + timedelta(days=6)
datetime_range = f"{week_start.strftime('%Y-%m-%dT%H:%M:%SZ')}/{week_end.strftime('%Y-%m-%dT%H:%M:%SZ')}"
print(f"📆 [{actual_idx}] Using week range: {datetime_range}")
except Exception as time_err:
print(f"❌ [{actual_idx}] Failed to select week: {time_err}")
df_read.at[actual_idx, "compatible"] = False
df_read.at[actual_idx, "compat_error"] = str(time_err)
df_read.at[actual_idx, "status_code"] = extract_status_code(str(time_err))
continue
ds_xarray = DatasetParams(
concept_id=concept_id,
backend="xarray",
datetime_range=datetime_range,
variable=variable,
step="P1D",
temporal_mode="point",
)
try:
compat = await check_titiler_cmr_compatibility(
endpoint=endpoint, dataset=ds_xarray, timeout_s=500, bounds_fraction=0.0001
)
compatibility_value = compat.get("compatibility")
is_compatible = compatibility_value == "compatible"
error_msg = compat.get("error")
df_read.at[actual_idx, "compatible"] = is_compatible
df_read.at[actual_idx, "compat_error"] = error_msg
df_read.at[actual_idx, "status_code"] = (
extract_status_code(str(error_msg)) if error_msg else None
)
print(f"✅ [{actual_idx}] Result: {compatibility_value}")
if error_msg:
print(f"⚠️ [{actual_idx}] Error from response: {error_msg}")
except Exception as compat_err:
print(f"❌ [{actual_idx}] Compatibility check failed: {compat_err}")
df_read.at[actual_idx, "compatible"] = False
df_read.at[actual_idx, "compat_error"] = str(compat_err)
df_read.at[actual_idx, "status_code"] = extract_status_code(str(compat_err))
print(f"\n✅ Completed checking {len(df_read)} collections")
print(f"Compatible: {df_read['compatible'].sum()}")
import re
def extract_status_code(error):
if pd.isna(error) or error is None:
return None
match = re.search(r"(?
In [ ]:
Copied!
from datetime import datetime
current_date = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
filename = f"compatibility_report_netcdf4_{current_date}.csv"
df_read.to_csv(filename, index=False)
print(f"\nCompatibility report created: {filename}")
from datetime import datetime
current_date = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
filename = f"compatibility_report_netcdf4_{current_date}.csv"
df_read.to_csv(filename, index=False)
print(f"\nCompatibility report created: {filename}")