"""Create a STAC-aware GeoZarr RGB tile of a Sentinel-2 scene over Vienna."""
import warnings
from datetime import date
from pathlib import Path
import jsonschema
import matplotlib.pyplot as plt
import pystac_client
import rioxarray as rxr
import xarray as xr
Create a STAC-aware GeoZarr RGB tile of a Sentinel-2 scene over Vienna.
This example demonstrates a best-practice workflow for creating a STAC-aware, interoperable GeoZarr store from Sentinel-2 Cloud-Optimized GeoTIFFs (COGs).
The key steps are:
- Query Earth-Search for a recent, low-cloud Sentinel-2 L2A scene.
- Stream and stack the source RGB bands into a single, lazy xarray.DataArray.
- Prepare a GeoZarr-compliant xarray.Dataset in memory, which includes embedding STAC metadata and refining the CRS attributes.
- Write the final, consolidated GeoZarr store.
- Verify the result by reopening the store and inspecting the STAC metadata.
- Display a coarsened preview of the RGB image.
Query Earth-Search for a recent, low-cloud Sentinel-2 L2A scene.
print("Step 1: Querying for a Sentinel-2 scene over Vienna...")
= "https://earth-search.aws.element84.com/v1"
API = "sentinel-2-l2a"
coll = [16.20, 48.10, 16.45, 48.30] # Vienna
bbox = date.today()
today = today.replace(year=today.year - 1)
last_year = f"{last_year:%Y-%m-%d}/{today:%Y-%m-%d}"
daterange
= next(
item open(API)
pystac_client.Client.
.search(=[coll],
collections=bbox,
bbox=daterange,
datetime={"eo:cloud_cover": {"lt": 5}},
query=1,
limit
)
.items(),None,
)if not item:
raise RuntimeError("No Sentinel-2 scene found for the specified criteria.")
print(f"Found scene: {item.id} (Cloud cover: {item.properties['eo:cloud_cover']:.2f}%)")
Step 1: Querying for a Sentinel-2 scene over Vienna...
Found scene: S2A_33UWP_20250620_0_L2A (Cloud cover: 4.52%)
Stream and stack the source RGB bands into a lazy DataArray.
print("\nStep 2: Streaming and stacking source COG bands...")
= ["red", "green", "blue"]
bands = xr.concat(
rgb
[
rxr.open_rasterio(={"band": 1, "x": 2048, "y": 2048}, masked=True
item.assets[b].href, chunks=[b])
).assign_coords(bandfor b in bands
],="band",
dim
)= "radiance"
rgb.name # Assign the Coordinate Reference System from the STAC item's properties.
= rgb.rio.write_crs(item.properties["proj:code"]) rgb
Step 2: Streaming and stacking source COG bands...
Prepare a complete, GeoZarr-compliant xarray.Dataset in memory.
print("\nStep 3: Preparing the final xarray.Dataset with all metadata...")
# Convert the stacked RGB DataArray into a Dataset and name the data variable
= rgb.to_dataset(name="radiance")
radiance_ds
# Grab the original GeoTransform from the DataArray
= rgb.rio.transform()
transform
# Ensure x/y are recognized as spatial dims, then write transform + CRS at the dataset level
= (
radiance_ds ="x", y_dim="y")
radiance_ds.rio.set_spatial_dims(x_dim
.rio.write_transform(transform)"proj:code"])
.rio.write_crs(item.properties[
)
# Remove the "spatial_ref" attribute because it is redundant with "crs_wkt" and not included in CF conventions.
= radiance_ds["spatial_ref"].attrs.pop("spatial_ref", None) _
Step 3: Preparing the final xarray.Dataset with all metadata...
Build the STAC Item metadata dictionary
= min(item.assets[b].to_dict().get("gsd", 10) for b in bands)
gsd = Path(f"../output/{coll}_{'_'.join(bands)}_{item.id}.zarr")
store_path = {
stac_metadata "type": "Item",
"stac_version": "1.0.0",
"id": item.id,
"bbox": item.bbox,
"geometry": item.geometry,
"properties": {
"datetime": item.properties["datetime"],
"proj:code": item.properties["proj:code"],
"platform": item.properties["platform"],
"instruments": item.properties["instruments"],
"eo:cloud_cover": item.properties["eo:cloud_cover"],
"gsd": gsd,
},"assets": {
"data": {
"href": store_path.name,
"type": "application/x-zarr",
"roles": ["data"],
}
},"license": item.properties.get("license", "proprietary"),
}# Basic STAC schema check
jsonschema.validate(=stac_metadata,
instance={"type": "object", "required": ["type", "id", "stac_version", "assets"]},
schema
)# Embed it in the dataset attrs
"stac"] = stac_metadata radiance_ds.attrs[
Write the final, consolidated GeoZarr store to disk.
print(f"\nStep 4: Writing the final GeoZarr store to '{store_path}'...")
# Ignore warnings about codecs, datetimes, and consolidated metadata not currently being part of the
# Zarr format 3 specification, since it is not relevant for this example.
"ignore", message=".*Zarr format 3 specification.*")
warnings.filterwarnings(
# Mode 'w' will remove everything in the store_path before writing new data.
= radiance_ds.chunk({"y": 512, "x": 512}).to_zarr(
store ="w", consolidated=True
store_path, mode )
Step 4: Writing the final GeoZarr store to '../output/sentinel-2-l2a_red_green_blue_S2A_33UWP_20250620_0_L2A.zarr'...
Verify the result by reopening the store and checking for STAC metadata.
print("\nStep 5: Verifying the created GeoZarr store...")
with xr.open_zarr(store_path, consolidated=True) as verified_ds:
if "stac" not in verified_ds.attrs:
raise RuntimeError("FAIL: STAC metadata was not found.")
print("✅ SUCCESS: Embedded STAC metadata was found.")
Step 5: Verifying the created GeoZarr store...
✅ SUCCESS: Embedded STAC metadata was found.
Display a coarsened preview of the RGB image.
print("\nStep 6: Generating and displaying a coarsened preview...")
with xr.open_zarr(store_path, consolidated=True) as ds_to_plot:
= ds_to_plot.radiance.coarsen(y=8, x=8, boundary="trim").mean()
preview
# Apply a simple contrast stretch for better visualization.
= preview.quantile(0.02), preview.quantile(0.98)
p2, p98
= preview.clip(min=p2, max=p98)
preview_stretched = (preview_stretched - p2) / (p98 - p2)
preview_stretched
= preview_stretched.transpose("y", "x", "band").values
rgb_preview
= plt.subplots(figsize=(8, 8))
fig, ax
ax.imshow(rgb_preview)f"Coarsened Preview of {item.id}")
ax.set_title(
ax.set_axis_off() plt.show()
Step 6: Generating and displaying a coarsened preview...