Stac virtualizarr
In [32]:
Copied!
import warnings
import xarray as xr
warnings.filterwarnings("ignore")
xr.set_options(display_expand_indexes=True)
import warnings
import xarray as xr
warnings.filterwarnings("ignore")
xr.set_options(display_expand_indexes=True)
Out[32]:
<xarray.core.options.set_options at 0x14bcd9090>
Set protocol, bucket and AWS profile values for use in obstore, virtualizarr registry and Icechunk virtual chunk container.¶
In [33]:
Copied!
scheme = "s3://"
bucket = "usgs-landsat"
region = "us-west-2"
scheme = "s3://"
bucket = "usgs-landsat"
region = "us-west-2"
You'll need to configure your profile or AWS credential information here for the requester pays USGS bucket.¶
In [34]:
Copied!
import os
profile = "impactnew"
os.environ["AWS_PROFILE"] = profile
import os
profile = "impactnew"
os.environ["AWS_PROFILE"] = profile
Configure obstore for requester pays access to the usgs-landsat bucket using the profile's credentials in order to parse COGs.¶
In [35]:
Copied!
import boto3
from obstore.store import S3Store
from obspec_utils.registry import ObjectStoreRegistry
from obstore.auth.boto3 import Boto3CredentialProvider
session = boto3.Session()
object_store = S3Store(
bucket=bucket,
region=region,
request_payer=True,
credential_provider=Boto3CredentialProvider(session=session),
)
registry = ObjectStoreRegistry({f"{scheme}{bucket}": object_store})
import boto3
from obstore.store import S3Store
from obspec_utils.registry import ObjectStoreRegistry
from obstore.auth.boto3 import Boto3CredentialProvider
session = boto3.Session()
object_store = S3Store(
bucket=bucket,
region=region,
request_payer=True,
credential_provider=Boto3CredentialProvider(session=session),
)
registry = ObjectStoreRegistry({f"{scheme}{bucket}": object_store})
Configure a local icechunk store with a virtual chunk container which can access the usgs-landsat bucket with the profile's credentials.¶
In [36]:
Copied!
import icechunk
location = "data/icechunk"
storage = icechunk.local_filesystem_storage(location)
config = icechunk.RepositoryConfig.default()
s3_chunk_store = icechunk.s3_store(
region=region,
requester_pays=True,
)
config.set_virtual_chunk_container(
icechunk.VirtualChunkContainer(f"{scheme}{bucket}/", s3_chunk_store)
)
credentials = icechunk.containers_credentials(
{f"{scheme}{bucket}/": icechunk.s3_credentials(from_env=True)}
)
repo = icechunk.Repository.open_or_create(
storage=storage, config=config, authorize_virtual_chunk_access=credentials
)
session = repo.writable_session("main")
import icechunk
location = "data/icechunk"
storage = icechunk.local_filesystem_storage(location)
config = icechunk.RepositoryConfig.default()
s3_chunk_store = icechunk.s3_store(
region=region,
requester_pays=True,
)
config.set_virtual_chunk_container(
icechunk.VirtualChunkContainer(f"{scheme}{bucket}/", s3_chunk_store)
)
credentials = icechunk.containers_credentials(
{f"{scheme}{bucket}/": icechunk.s3_credentials(from_env=True)}
)
repo = icechunk.Repository.open_or_create(
storage=storage, config=config, authorize_virtual_chunk_access=credentials
)
session = repo.writable_session("main")
2026-05-13T01:06:28.425091Z WARN icechunk::storage::object_store: The LocalFileSystem storage is not safe for concurrent commits. If more than one thread/process will attempt to commit at the same time, prefer using object stores. at icechunk/src/storage/object_store.rs:92
Search a STAC API and ingest the results into the zarr-datafusion-search metadata schema in the Icechunk store¶
In [37]:
Copied!
from zarr_datafusion_search import ingest_stac_search
rows = await ingest_stac_search(
url="https://earth-search.aws.element84.com/v1",
session=session,
collections=["landsat-c2-l2"],
bbox=(-124.4096, 32.5343, -114.1312, 42.0095),
max_items=10,
datetime="2025-10-01/2025-10-11",
asset_hrefs=["red", "green", "blue"]
)
rows
from zarr_datafusion_search import ingest_stac_search
rows = await ingest_stac_search(
url="https://earth-search.aws.element84.com/v1",
session=session,
collections=["landsat-c2-l2"],
bbox=(-124.4096, 32.5343, -114.1312, 42.0095),
max_items=10,
datetime="2025-10-01/2025-10-11",
asset_hrefs=["red", "green", "blue"]
)
rows
Out[37]:
10
The properties from the STAC items are written to the "column" arrays in the meta group.¶
Note that non-scalar STAC item properties are ignored except for bbox, shape, transform and assets which are flattened into "column" arrays in the meta group.
In [38]:
Copied!
import zarr
session = repo.writable_session("main")
root = zarr.open(session.store)
root.tree()
import zarr
session = repo.writable_session("main")
root = zarr.open(session.store)
root.tree()
Out[38]:
/
└── meta
├── asset_blue (10,) StringDType()
├── asset_green (10,) StringDType()
├── asset_red (10,) StringDType()
├── bbox (10,) object
├── created (10,) datetime64[ms]
├── datetime (10,) datetime64[ms]
├── description (10,) StringDType()
├── earthsearch:payload_id (10,) StringDType()
├── eo:cloud_cover (10,) float64
├── grid:code (10,) StringDType()
├── gsd (10,) int64
├── id (10,) StringDType()
├── landsat:cloud_cover_land (10,) float64
├── landsat:collection_category (10,) StringDType()
├── landsat:collection_number (10,) StringDType()
├── landsat:correction (10,) StringDType()
├── landsat:product_generated (10,) StringDType()
├── landsat:scene_id (10,) StringDType()
├── landsat:wrs_path (10,) StringDType()
├── landsat:wrs_row (10,) StringDType()
├── landsat:wrs_type (10,) StringDType()
├── platform (10,) StringDType()
├── proj:epsg (10,) int64
├── sci:doi (10,) StringDType()
├── shape_x (10,) int64
├── shape_y (10,) int64
├── transform_0 (10,) float64
├── transform_1 (10,) float64
├── transform_2 (10,) float64
├── transform_3 (10,) float64
├── transform_4 (10,) float64
├── transform_5 (10,) float64
├── updated (10,) datetime64[ms]
├── view:off_nadir (10,) int64
├── view:sun_azimuth (10,) float64
└── view:sun_elevation (10,) float64
Now we can query this metadata with normal SQL.¶
In [39]:
Copied!
from zarr_datafusion_search import ZarrTable
from datafusion import SessionContext
zarr_table = await ZarrTable.from_icechunk(session=session, group_path="/meta")
ctx = SessionContext()
ctx.register_table_provider("icechunk_data", zarr_table)
# Note that special characters in columns like ":" require quotes and escapes.
sql = "SELECT id, datetime, \"eo:cloud_cover\" FROM icechunk_data WHERE datetime < CAST('2025-10-11' AS DATE) and \"eo:cloud_cover\" < 12;"
df = ctx.sql(sql)
df
from zarr_datafusion_search import ZarrTable
from datafusion import SessionContext
zarr_table = await ZarrTable.from_icechunk(session=session, group_path="/meta")
ctx = SessionContext()
ctx.register_table_provider("icechunk_data", zarr_table)
# Note that special characters in columns like ":" require quotes and escapes.
sql = "SELECT id, datetime, \"eo:cloud_cover\" FROM icechunk_data WHERE datetime < CAST('2025-10-11' AS DATE) and \"eo:cloud_cover\" < 12;"
df = ctx.sql(sql)
df
[src/table.rs:36:9] "Created icechunk session from msgpack serialization" = "Created icechunk session from msgpack serialization"
[src/table.rs:37:9] icechunk_session.config() = RepositoryConfig {
inline_chunk_threshold_bytes: None,
get_partial_values_concurrency: None,
compression: None,
max_concurrent_requests: None,
caching: None,
storage: None,
virtual_chunk_containers: Some(
{
"s3://usgs-landsat/": VirtualChunkContainer {
name: None,
url_prefix: "s3://usgs-landsat/",
store: S3(
S3Options {
region: Some(
"us-west-2",
),
endpoint_url: None,
anonymous: false,
allow_http: false,
force_path_style: false,
network_stream_timeout_seconds: Some(
60,
),
requester_pays: true,
},
),
},
},
),
manifest: None,
}
Out[39]:
| id | datetime | eo:cloud_cover |
|---|---|---|
| 2.26 | ||
| 2.09 |
We can also make spatial SQL queries using geodatafusion.¶
In [40]:
Copied!
from shapely.geometry import box
from geodatafusion import register_all
register_all(ctx)
item_bbox = box(-122.599701, 39.251552, -119.847948, 41.390367)
sql = f"""SELECT id FROM icechunk_data WHERE ST_Intersects(bbox, ST_GeomFromText('{item_bbox.wkt}')) and datetime < CAST('2025-10-11' AS DATE);"""
df_spatial = ctx.sql(sql)
df_spatial
from shapely.geometry import box
from geodatafusion import register_all
register_all(ctx)
item_bbox = box(-122.599701, 39.251552, -119.847948, 41.390367)
sql = f"""SELECT id FROM icechunk_data WHERE ST_Intersects(bbox, ST_GeomFromText('{item_bbox.wkt}')) and datetime < CAST('2025-10-11' AS DATE);"""
df_spatial = ctx.sql(sql)
df_spatial
Out[40]:
| id |
|---|
Let's select all the columns so we can use the required ones to create Virtualizarr arrays with Zarr geo-proj and multiscales convetions.¶
In [41]:
Copied!
sql = "SELECT * FROM icechunk_data WHERE datetime < CAST('2025-10-11' AS DATE) and \"eo:cloud_cover\" < 12 limit 2;"
df = ctx.sql(sql)
df
sql = "SELECT * FROM icechunk_data WHERE datetime < CAST('2025-10-11' AS DATE) and \"eo:cloud_cover\" < 12 limit 2;"
df = ctx.sql(sql)
df
Out[41]:
| asset_blue | asset_green | asset_red | bbox | created | datetime | description | earthsearch:payload_id | eo:cloud_cover | grid:code | gsd | id | landsat:cloud_cover_land | landsat:collection_category | landsat:collection_number | landsat:correction | landsat:product_generated | landsat:scene_id | landsat:wrs_path | landsat:wrs_row | landsat:wrs_type | platform | proj:epsg | sci:doi | shape_x | shape_y | transform_0 | transform_1 | transform_2 | transform_3 | transform_4 | transform_5 | updated | view:off_nadir | view:sun_azimuth | view:sun_elevation |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2.26 | WRS2-044035 | 30 | 1.12 | T1 | 02 | L2SP | 2025-11-17T21:53:25Z | LC80440352025283LGN00 | 044 | 035 | 2 | landsat-8 | 32610 | 10.5066/P9OGBGM6 | 7671 | 7811 | 30.0 | 0.0 | 425685.0 | 0.0 | -30.0 | 4105515.0 | 0 | 155.40358771 | 43.94135381 | ||||||||||
| 2.09 | WRS2-044034 | 30 | 2.89 | T1 | 02 | L2SP | 2025-11-17T21:51:08Z | LC80440342025283LGN00 | 044 | 034 | 2 | landsat-8 | 32610 | 10.5066/P9OGBGM6 | 7671 | 7811 | 30.0 | 0.0 | 462285.0 | 0.0 | -30.0 | 4264515.0 | 0 | 156.37026407 | 42.74059459 |
Generate Zarr multiscales convetion metadata for the selected overviews/IFDs in the source COG.¶
In [42]:
Copied!
from typing import MutableMapping, Any
def write_multiscales(overviews: int, attrs: MutableMapping[str, Any]):
layout = []
factor = 2
for overview in range(overviews):
if overview == 0:
config = {
"asset": str(overview)
}
else:
scale = float(factor ** overview)
config = {
"asset": str(overview),
"derived_from": str(overview - 1),
"factors": [factor, factor],
"transform": {
"scale": [scale, scale],
"translation": [0.0, 0.0],
}
}
layout.append(config)
attrs["multiscales"] = {
"layout": layout
}
from typing import MutableMapping, Any
def write_multiscales(overviews: int, attrs: MutableMapping[str, Any]):
layout = []
factor = 2
for overview in range(overviews):
if overview == 0:
config = {
"asset": str(overview)
}
else:
scale = float(factor ** overview)
config = {
"asset": str(overview),
"derived_from": str(overview - 1),
"factors": [factor, factor],
"transform": {
"scale": [scale, scale],
"translation": [0.0, 0.0],
}
}
layout.append(config)
attrs["multiscales"] = {
"layout": layout
}
Use the proj metadata to create Zarr geo-proj convention metadata.¶
In [43]:
Copied!
from rasterio.warp import transform_bounds
def write_geoproj_spatial(
proj_code: int,
dimensions: tuple[int, int],
bbox: tuple[float, float, float, float],
transform: tuple[float, float, float, float, float, float],
attrs: MutableMapping[str, Any],
):
projected_bbox = transform_bounds(
"EPSG:4326",
proj_code,
*bbox
)
attrs["proj:code"] = proj_code
attrs["spatial:dimensions"] = dimensions
attrs["spatial:transform"] = transform
attrs["spatial:bbox"] = projected_bbox
from rasterio.warp import transform_bounds
def write_geoproj_spatial(
proj_code: int,
dimensions: tuple[int, int],
bbox: tuple[float, float, float, float],
transform: tuple[float, float, float, float, float, float],
attrs: MutableMapping[str, Any],
):
projected_bbox = transform_bounds(
"EPSG:4326",
proj_code,
*bbox
)
attrs["proj:code"] = proj_code
attrs["spatial:dimensions"] = dimensions
attrs["spatial:transform"] = transform
attrs["spatial:bbox"] = projected_bbox
Parse the selected asset using Virtualizarr and write the resulting ManifestArray to Icechunk.¶
In [44]:
Copied!
from virtualizarr import open_virtual_dataset
from virtual_tiff import VirtualTIFF
def write_virtualizarr_data(id: str, assets: dict[str, str], overviews: int, session):
for key, asset in assets.items():
for overview in range(overviews):
group = f"{id}/{key}/multiscales/{overview}"
ds = open_virtual_dataset(
url=asset,
registry=registry,
parser=VirtualTIFF(ifd=overview)
)
ds.vz.to_icechunk(
store=session.store,
group=group,
)
from virtualizarr import open_virtual_dataset
from virtual_tiff import VirtualTIFF
def write_virtualizarr_data(id: str, assets: dict[str, str], overviews: int, session):
for key, asset in assets.items():
for overview in range(overviews):
group = f"{id}/{key}/multiscales/{overview}"
ds = open_virtual_dataset(
url=asset,
registry=registry,
parser=VirtualTIFF(ifd=overview)
)
ds.vz.to_icechunk(
store=session.store,
group=group,
)
Write the Zarr conventions.¶
In [45]:
Copied!
from shapely import wkb
def write_conventions(row: dict[str, Any], overviews: int, session):
item_group = zarr.open_group(session.store, path=f"/{row['id']}", zarr_format=3)
write_multiscales(overviews=overviews, attrs=item_group.attrs)
geom = wkb.loads(row["bbox"])
bbox = [*geom.bounds]
write_geoproj_spatial(
proj_code=row["proj:epsg"],
dimensions=[row["shape_y"], row["shape_x"]],
bbox=bbox,
transform=[row["transform_0"], row["transform_1"], row["transform_2"], row["transform_3"], row["transform_4"], row["transform_5"]],
attrs=item_group.attrs,
)
item_group.attrs["zarr_conventions"] = [
{
"schema_url": "https://raw.githubusercontent.com/zarr-conventions/multiscales/refs/tags/v1/schema.json",
"spec_url": "https://github.com/zarr-conventions/multiscales/blob/v1/README.md",
"uuid": "d35379db-88df-4056-af3a-620245f8e347",
"name": "multiscales",
"description": "Multiscale layout of zarr datasets"
},
{
"schema_url": "https://raw.githubusercontent.com/zarr-experimental/geo-proj/refs/tags/v1/schema.json",
"spec_url": "https://github.com/zarr-experimental/geo-proj/blob/v1/README.md",
"uuid": "f17cb550-5864-4468-aeb7-f3180cfb622f",
"name": "proj:",
"description": "Coordinate reference system information for geospatial data"
},
{
"schema_url": "https://raw.githubusercontent.com/zarr-conventions/spatial/refs/tags/v1/schema.json",
"spec_url": "https://github.com/zarr-conventions/spatial/blob/v1/README.md",
"uuid": "689b58e2-cf7b-45e0-9fff-9cfc0883d6b4",
"name": "spatial:",
"description": "Spatial coordinate information"
}
]
from shapely import wkb
def write_conventions(row: dict[str, Any], overviews: int, session):
item_group = zarr.open_group(session.store, path=f"/{row['id']}", zarr_format=3)
write_multiscales(overviews=overviews, attrs=item_group.attrs)
geom = wkb.loads(row["bbox"])
bbox = [*geom.bounds]
write_geoproj_spatial(
proj_code=row["proj:epsg"],
dimensions=[row["shape_y"], row["shape_x"]],
bbox=bbox,
transform=[row["transform_0"], row["transform_1"], row["transform_2"], row["transform_3"], row["transform_4"], row["transform_5"]],
attrs=item_group.attrs,
)
item_group.attrs["zarr_conventions"] = [
{
"schema_url": "https://raw.githubusercontent.com/zarr-conventions/multiscales/refs/tags/v1/schema.json",
"spec_url": "https://github.com/zarr-conventions/multiscales/blob/v1/README.md",
"uuid": "d35379db-88df-4056-af3a-620245f8e347",
"name": "multiscales",
"description": "Multiscale layout of zarr datasets"
},
{
"schema_url": "https://raw.githubusercontent.com/zarr-experimental/geo-proj/refs/tags/v1/schema.json",
"spec_url": "https://github.com/zarr-experimental/geo-proj/blob/v1/README.md",
"uuid": "f17cb550-5864-4468-aeb7-f3180cfb622f",
"name": "proj:",
"description": "Coordinate reference system information for geospatial data"
},
{
"schema_url": "https://raw.githubusercontent.com/zarr-conventions/spatial/refs/tags/v1/schema.json",
"spec_url": "https://github.com/zarr-conventions/spatial/blob/v1/README.md",
"uuid": "689b58e2-cf7b-45e0-9fff-9cfc0883d6b4",
"name": "spatial:",
"description": "Spatial coordinate information"
}
]
Create Virtualizarr arrays for each of the rows in the query results.¶
In [46]:
Copied!
# Hardcode the number of multiscale IFDs in the Landsat COGs.
overviews = 7
for row in df.to_arrow_table().to_pylist():
assets = {
"red": row["asset_red"],
"blue": row["asset_blue"]
}
session = repo.writable_session("main")
write_virtualizarr_data(
id=row["id"],
assets=assets,
overviews=overviews,
session=session
)
write_conventions(row=row, overviews=overviews, session=session)
message = session.commit(row["id"])
print(message)
# Hardcode the number of multiscale IFDs in the Landsat COGs.
overviews = 7
for row in df.to_arrow_table().to_pylist():
assets = {
"red": row["asset_red"],
"blue": row["asset_blue"]
}
session = repo.writable_session("main")
write_virtualizarr_data(
id=row["id"],
assets=assets,
overviews=overviews,
session=session
)
write_conventions(row=row, overviews=overviews, session=session)
message = session.commit(row["id"])
print(message)
QDRDWKSZW0V2Z2NPNZ50 Q0M73STXYYC7XSDAPB40
In [47]:
Copied!
store = zarr.open(session.store)
store.tree()
store = zarr.open(session.store)
store.tree()
Out[47]:
/
├── LC08_L2SP_044034_20251010_02_T1
│ ├── blue
│ │ └── multiscales
│ │ ├── 0
│ │ │ └── 0 (7811, 7671) uint16
│ │ ├── 1
│ │ │ └── 1 (3906, 3836) uint16
│ │ ├── 2
│ │ │ └── 2 (1953, 1918) uint16
│ │ ├── 3
│ │ │ └── 3 (977, 959) uint16
│ │ ├── 4
│ │ │ └── 4 (489, 480) uint16
│ │ ├── 5
│ │ │ └── 5 (245, 240) uint16
│ │ └── 6
│ │ └── 6 (123, 120) uint16
│ └── red
│ └── multiscales
│ ├── 0
│ │ └── 0 (7811, 7671) uint16
│ ├── 1
│ │ └── 1 (3906, 3836) uint16
│ ├── 2
│ │ └── 2 (1953, 1918) uint16
│ ├── 3
│ │ └── 3 (977, 959) uint16
│ ├── 4
│ │ └── 4 (489, 480) uint16
│ ├── 5
│ │ └── 5 (245, 240) uint16
│ └── 6
│ └── 6 (123, 120) uint16
├── LC08_L2SP_044035_20251010_02_T1
│ ├── blue
│ │ └── multiscales
│ │ ├── 0
│ │ │ └── 0 (7811, 7671) uint16
│ │ ├── 1
│ │ │ └── 1 (3906, 3836) uint16
│ │ ├── 2
│ │ │ └── 2 (1953, 1918) uint16
│ │ ├── 3
│ │ │ └── 3 (977, 959) uint16
│ │ ├── 4
│ │ │ └── 4 (489, 480) uint16
│ │ ├── 5
│ │ │ └── 5 (245, 240) uint16
│ │ └── 6
│ │ └── 6 (123, 120) uint16
│ └── red
│ └── multiscales
│ ├── 0
│ │ └── 0 (7811, 7671) uint16
│ ├── 1
│ │ └── 1 (3906, 3836) uint16
│ ├── 2
│ │ └── 2 (1953, 1918) uint16
│ ├── 3
│ │ └── 3 (977, 959) uint16
│ ├── 4
│ │ └── 4 (489, 480) uint16
│ ├── 5
│ │ └── 5 (245, 240) uint16
│ └── 6
│ └── 6 (123, 120) uint16
└── meta
├── asset_blue (10,) StringDType()
├── asset_green (10,) StringDType()
├── asset_red (10,) StringDType()
├── bbox (10,) object
├── created (10,) datetime64[ms]
├── datetime (10,) datetime64[ms]
├── description (10,) StringDType()
├── earthsearch:payload_id (10,) StringDType()
├── eo:cloud_cover (10,) float64
├── grid:code (10,) StringDType()
├── gsd (10,) int64
├── id (10,) StringDType()
├── landsat:cloud_cover_land (10,) float64
├── landsat:collection_category (10,) StringDType()
├── landsat:collection_number (10,) StringDType()
├── landsat:correction (10,) StringDType()
├── landsat:product_generated (10,) StringDType()
├── landsat:scene_id (10,) StringDType()
├── landsat:wrs_path (10,) StringDType()
├── landsat:wrs_row (10,) StringDType()
├── landsat:wrs_type (10,) StringDType()
├── platform (10,) StringDType()
├── proj:epsg (10,) int64
├── sci:doi (10,) StringDType()
├── shape_x (10,) int64
├── shape_y (10,) int64
├── transform_0 (10,) float64
├── transform_1 (10,) float64
├── transform_2 (10,) float64
├── transform_3 (10,) float64
├── transform_4 (10,) float64
├── transform_5 (10,) float64
├── updated (10,) datetime64[ms]
├── view:off_nadir (10,) int64
├── view:sun_azimuth (10,) float64
└── view:sun_elevation (10,) float64
In [48]:
Copied!
row = df.to_arrow_table().to_pylist()[1]
item_group = zarr.open_group(session.store, path=row["id"], mode="r")
multiscales = item_group.attrs["multiscales"]
row = df.to_arrow_table().to_pylist()[1]
item_group = zarr.open_group(session.store, path=row["id"], mode="r")
multiscales = item_group.attrs["multiscales"]
Determine the correct overview to use for the tile zoom.¶
In [49]:
Copied!
import morecantile
def pick_best_overview(
tile_zoom: int,
multiscales,
native_resolution=30.0,
tms: morecantile.TileMatrixSet = morecantile.tms.get("WebMercatorQuad")
):
target_resolution = tms.matrix(tile_zoom).cellSize
overview_resolution = []
for entry in multiscales["layout"]:
scale = entry.get("transform", {}).get("scale", [1.0])[0]
resolution = native_resolution * scale
overview_resolution.append((entry["asset"], resolution))
candidates = [ov for ov in overview_resolution if ov[1] <= target_resolution]
if candidates:
return max(candidates, key=lambda ov: ov[1])[0]
else:
return min(overview_resolution, key=lambda ov: ov[1])[0]
import morecantile
def pick_best_overview(
tile_zoom: int,
multiscales,
native_resolution=30.0,
tms: morecantile.TileMatrixSet = morecantile.tms.get("WebMercatorQuad")
):
target_resolution = tms.matrix(tile_zoom).cellSize
overview_resolution = []
for entry in multiscales["layout"]:
scale = entry.get("transform", {}).get("scale", [1.0])[0]
resolution = native_resolution * scale
overview_resolution.append((entry["asset"], resolution))
candidates = [ov for ov in overview_resolution if ov[1] <= target_resolution]
if candidates:
return max(candidates, key=lambda ov: ov[1])[0]
else:
return min(overview_resolution, key=lambda ov: ov[1])[0]
In [50]:
Copied!
tile_zoom = 10
overview = pick_best_overview(
tile_zoom=tile_zoom,
multiscales=multiscales,
)
overview
tile_zoom = 10
overview = pick_best_overview(
tile_zoom=tile_zoom,
multiscales=multiscales,
)
overview
Out[50]:
'2'
Open the selected overview as an xarray dataset.¶
In [51]:
Copied!
ds = xr.open_zarr(store=session.store, group=f"{row['id']}/red/multiscales/{overview}")
ds
ds = xr.open_zarr(store=session.store, group=f"{row['id']}/red/multiscales/{overview}")
ds
Out[51]:
<xarray.Dataset> Size: 15MB
Dimensions: (y: 1953, x: 1918)
Dimensions without coordinates: y, x
Data variables:
2 (y, x) float32 15MB dask.array<chunksize=(128, 128), meta=np.ndarray>Use the geo-proj convention metadata to create an affine transformation and set a rasterix RasterIndex to use for selection operations.¶
In [52]:
Copied!
from affine import Affine
from rasterix import RasterIndex
affine = Affine(*item_group.attrs["spatial:transform"])
index = RasterIndex.from_transform(
affine=affine,
width=ds.sizes["x"],
height=ds.sizes["y"],
x_dim="x",
y_dim="y",
crs=item_group.attrs["proj:code"],
)
coords = xr.Coordinates.from_xindex(index)
ds = ds.assign_coords(coords)
ds
from affine import Affine
from rasterix import RasterIndex
affine = Affine(*item_group.attrs["spatial:transform"])
index = RasterIndex.from_transform(
affine=affine,
width=ds.sizes["x"],
height=ds.sizes["y"],
x_dim="x",
y_dim="y",
crs=item_group.attrs["proj:code"],
)
coords = xr.Coordinates.from_xindex(index)
ds = ds.assign_coords(coords)
ds
Out[52]:
<xarray.Dataset> Size: 15MB
Dimensions: (y: 1953, x: 1918)
Coordinates:
* y (y) float64 16kB 4.264e+06 4.264e+06 ... 4.206e+06 4.206e+06
* x (x) float64 15kB 4.623e+05 4.623e+05 ... 5.198e+05 5.198e+05
Data variables:
2 (y, x) float32 15MB dask.array<chunksize=(128, 128), meta=np.ndarray>
Indexes:
┌ x RasterIndex (crs=EPSG:32610)
└ yIn [53]:
Copied!
ds = ds.proj.assign_crs(spatial_ref=item_group.attrs["proj:code"], allow_override=True)
ds
ds = ds.proj.assign_crs(spatial_ref=item_group.attrs["proj:code"], allow_override=True)
ds
Out[53]:
<xarray.Dataset> Size: 15MB
Dimensions: (y: 1953, x: 1918)
Coordinates:
* y (y) float64 16kB 4.264e+06 4.264e+06 ... 4.206e+06 4.206e+06
* x (x) float64 15kB 4.623e+05 4.623e+05 ... 5.198e+05 5.198e+05
* spatial_ref int64 8B 0
Data variables:
2 (y, x) float32 15MB dask.array<chunksize=(128, 128), meta=np.ndarray>
Indexes:
┌ x RasterIndex (crs=EPSG:32610)
└ y
spatial_ref CRSIndex (crs=EPSG:32610)Get the list of web merctaor tiles intersecting the granule.¶
In [54]:
Copied!
from shapely import wkb
row_bbox = wkb.loads(row["bbox"])
granule_bbox = morecantile.commons.BoundingBox(*row_bbox.bounds)
tms = morecantile.tms.get("WebMercatorQuad")
tiles = list(tms.tiles(*granule_bbox, zooms=tile_zoom))
row_bbox.bounds
from shapely import wkb
row_bbox = wkb.loads(row["bbox"])
granule_bbox = morecantile.commons.BoundingBox(*row_bbox.bounds)
tms = morecantile.tms.get("WebMercatorQuad")
tiles = list(tms.tiles(*granule_bbox, zooms=tile_zoom))
row_bbox.bounds
Out[54]:
(-123.431946, 36.400143, -120.796313, 38.525512)
For context, display a map to visualize the web mercator tiles that intersect the granule.¶
In [55]:
Copied!
import folium
def create_map(row_bounds, tiles):
zoom_bbox = box(*row_bounds)
zoom_bbox.centroid
m = folium.Map(
location=[zoom_bbox.centroid.y, zoom_bbox.centroid.x],
zoom_start=8,
tiles="CartoDB positron",
width="50%",
)
folium.Rectangle(
bounds=[
[row_bounds[1], row_bounds[0]], # [south, west]
[row_bounds[3], row_bounds[2]] # [north, east]
],
color="red",
fill=True,
fillOpacity=0.1,
weight=3,
).add_to(m)
for tile in tiles:
tile_bounds = tms.bounds(tile)
folium.Rectangle(
bounds=[
[tile_bounds.bottom, tile_bounds.left], # southwest
[tile_bounds.top, tile_bounds.right] # northeast
],
color="blue",
fill=True,
fillOpacity=0.1,
weight=0.5,
popup=f"Tile: {tile.z}/{tile.x}/{tile.y}"
).add_to(m)
return m
m = create_map(row_bbox.bounds, tiles=tiles)
m
import folium
def create_map(row_bounds, tiles):
zoom_bbox = box(*row_bounds)
zoom_bbox.centroid
m = folium.Map(
location=[zoom_bbox.centroid.y, zoom_bbox.centroid.x],
zoom_start=8,
tiles="CartoDB positron",
width="50%",
)
folium.Rectangle(
bounds=[
[row_bounds[1], row_bounds[0]], # [south, west]
[row_bounds[3], row_bounds[2]] # [north, east]
],
color="red",
fill=True,
fillOpacity=0.1,
weight=3,
).add_to(m)
for tile in tiles:
tile_bounds = tms.bounds(tile)
folium.Rectangle(
bounds=[
[tile_bounds.bottom, tile_bounds.left], # southwest
[tile_bounds.top, tile_bounds.right] # northeast
],
color="blue",
fill=True,
fillOpacity=0.1,
weight=0.5,
popup=f"Tile: {tile.z}/{tile.x}/{tile.y}"
).add_to(m)
return m
m = create_map(row_bbox.bounds, tiles=tiles)
m
Out[55]:
Make this Notebook Trusted to load map: File -> Trust Notebook
Select a tile¶
In [56]:
Copied!
tile = [t for t in tiles if t.x == 163 and t.y == 394 and t.z == 10][0]
tile
tile = [t for t in tiles if t.x == 163 and t.y == 394 and t.z == 10][0]
tile
Out[56]:
Tile(x=163, y=394, z=10)
Reproject the selected tile bounds to the granule's CRS.¶
In [57]:
Copied!
import shapely
from pyproj import Transformer
from shapely.ops import transform
tile_bbox = tms.xy_bounds(tile)
tile_bbox_geom = shapely.geometry.box(*tile_bbox)
utm_transformer = Transformer.from_crs("EPSG:3857", item_group.attrs["proj:code"], always_xy=True).transform
tile_bbox_utm = transform(utm_transformer, tile_bbox_geom)
tile_bbox_utm.bounds
import shapely
from pyproj import Transformer
from shapely.ops import transform
tile_bbox = tms.xy_bounds(tile)
tile_bbox_geom = shapely.geometry.box(*tile_bbox)
utm_transformer = Transformer.from_crs("EPSG:3857", item_group.attrs["proj:code"], always_xy=True).transform
tile_bbox_utm = transform(utm_transformer, tile_bbox_geom)
tile_bbox_utm.bounds
Out[57]:
(526651.4193177071, 4205433.050741122, 557620.3068929347, 4236274.712115319)
We can use this reprojected tile bbox for CRS aware selection operations in xarray with rasterix.¶¶
In [58]:
Copied!
left, bottom, right, top = tile_bbox_utm.bounds
ds_tile = ds.sel(x=slice(left, right), y=slice(top, bottom))
ds_tile
left, bottom, right, top = tile_bbox_utm.bounds
ds_tile = ds.sel(x=slice(left, right), y=slice(top, bottom))
ds_tile
Out[58]:
<xarray.Dataset> Size: 8kB
Dimensions: (y: 1012, x: 0)
Coordinates:
* y (y) float64 8kB 4.236e+06 4.236e+06 ... 4.206e+06 4.206e+06
* x (x) float64 0B
* spatial_ref int64 8B 0
Data variables:
2 (y, x) float32 0B dask.array<chunksize=(83, 0), meta=np.ndarray>
Indexes:
┌ x RasterIndex (crs=EPSG:32610)
└ y
spatial_ref CRSIndex (crs=EPSG:32610)Using the geo-proj metadata configure a pyresample source area definition for granule and a target area definition for the selected tile.¶
In [59]:
Copied!
from rasterio.transform import array_bounds
from pyresample.area_config import create_area_def
tile_shape = 256
left, bottom, right, top = array_bounds(ds.sizes["y"], ds.sizes["x"], affine)
target_area_def = create_area_def(
area_id=1,
projection="EPSG:3857",
shape=(tile_shape, tile_shape),
area_extent=tile_bbox_geom.bounds
)
source_area_def = create_area_def(
area_id=2,
projection=item_group.attrs["proj:code"],
shape=(ds.sizes["y"], ds.sizes["x"]),
area_extent=item_group.attrs["spatial:bbox"],
)
from rasterio.transform import array_bounds
from pyresample.area_config import create_area_def
tile_shape = 256
left, bottom, right, top = array_bounds(ds.sizes["y"], ds.sizes["x"], affine)
target_area_def = create_area_def(
area_id=1,
projection="EPSG:3857",
shape=(tile_shape, tile_shape),
area_extent=tile_bbox_geom.bounds
)
source_area_def = create_area_def(
area_id=2,
projection=item_group.attrs["proj:code"],
shape=(ds.sizes["y"], ds.sizes["x"]),
area_extent=item_group.attrs["spatial:bbox"],
)
In [60]:
Copied!
from pyresample.resampler import resample_blocks
from pyresample.gradient import block_nn_interpolator, gradient_resampler_indices_block
da = ds[overview]
indices_xy = resample_blocks(
gradient_resampler_indices_block,
source_area_def,
[],
target_area_def,
chunk_size=(tile_shape),
dtype=float,
)
resampled = resample_blocks(
block_nn_interpolator,
source_area_def,
[da.data],
target_area_def,
dst_arrays=[indices_xy],
chunk_size=(tile_shape),
dtype=da.dtype,
)
projected_tile = resampled.compute()
from pyresample.resampler import resample_blocks
from pyresample.gradient import block_nn_interpolator, gradient_resampler_indices_block
da = ds[overview]
indices_xy = resample_blocks(
gradient_resampler_indices_block,
source_area_def,
[],
target_area_def,
chunk_size=(tile_shape),
dtype=float,
)
resampled = resample_blocks(
block_nn_interpolator,
source_area_def,
[da.data],
target_area_def,
dst_arrays=[indices_xy],
chunk_size=(tile_shape),
dtype=da.dtype,
)
projected_tile = resampled.compute()
In [61]:
Copied!
wmts_tile = xr.DataArray(projected_tile, dims=("x", "y"))
wmts_tile.plot()
wmts_tile = xr.DataArray(projected_tile, dims=("x", "y"))
wmts_tile.plot()
Out[61]:
<matplotlib.collections.QuadMesh at 0x14cca0190>
Use the tile's WGS84 bounds to add the Numpy data to Folium.¶
In [62]:
Copied!
wgs84_transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326", always_xy=True).transform
tile_bbox_wgs84 = transform(wgs84_transformer, tile_bbox_geom)
west, south, east, north = tile_bbox_wgs84.bounds
bounds = [[south, west], [north, east]]
folium.raster_layers.ImageOverlay(
image=wmts_tile.data,
bounds=bounds,
colormap=lambda x: (x, x, x, 1),
).add_to(m)
m.fit_bounds(bounds)
m
wgs84_transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326", always_xy=True).transform
tile_bbox_wgs84 = transform(wgs84_transformer, tile_bbox_geom)
west, south, east, north = tile_bbox_wgs84.bounds
bounds = [[south, west], [north, east]]
folium.raster_layers.ImageOverlay(
image=wmts_tile.data,
bounds=bounds,
colormap=lambda x: (x, x, x, 1),
).add_to(m)
m.fit_bounds(bounds)
m
Out[62]:
Make this Notebook Trusted to load map: File -> Trust Notebook