Experimental: async array fetching workflow¶
There are potential use-cases where you might want to use lazycogs to fetch arrays from COGs in a fully async way.
In [1]:
Copied!
import time
from contextlib import contextmanager
from urllib.parse import urlparse
import matplotlib.pyplot as plt
import numpy as np
import rustac
from affine import Affine
from obstore.store import from_url
from pyproj import CRS, Transformer
import lazycogs
@contextmanager
def timer(label="Task"):
t0 = time.perf_counter()
try:
yield
finally:
t = time.perf_counter() - t0
print(f"{label} took {t:.2f} seconds")
import time
from contextlib import contextmanager
from urllib.parse import urlparse
import matplotlib.pyplot as plt
import numpy as np
import rustac
from affine import Affine
from obstore.store import from_url
from pyproj import CRS, Transformer
import lazycogs
@contextmanager
def timer(label="Task"):
t0 = time.perf_counter()
try:
yield
finally:
t = time.perf_counter() - t0
print(f"{label} took {t:.2f} seconds")
Define an async function that fetches the STAC items using rustac.search (async) then calls lazycogs.read_chunk_async to asynchronously create an array in the native CRS of the assets.
In [2]:
Copied!
async def get_array(
dst_crs: CRS,
dst_bbox: tuple[float, float, float, float],
resolution: float,
bands: list[str],
*args,
**kwargs,
) -> dict[str, np.ndarray]:
# perform the STAC search
transformer = Transformer.from_crs(dst_crs, "epsg:4326", always_xy=True)
bbox_4326 = transformer.transform_bounds(*dst_bbox)
items = await rustac.search(
*args,
**kwargs,
bbox=bbox_4326,
)
# build the object store with skip_signature=True
# (needed for sentinel 2 COGs)
sample_item = items[0]
parsed = urlparse(sample_item["assets"][bands[0]]["href"])
store = from_url(f"{parsed.scheme}://{parsed.netloc}", skip_signature=True)
# maximize efficiency by aligning the input grid to the grid of the
# underlying COG assets
xmin, ymin, xmax, ymax = lazycogs.align_bbox(
sample_item["assets"][bands[0]][
"proj:transform"
], # not guaranteed to exist on any item!
dst_bbox,
)
# define the affine transform and array shape
affine = Affine.from_gdal(xmin, resolution, 0.0, ymax, 0.0, -resolution)
width = int((xmax - xmin) / resolution)
height = int((ymax - ymin) / resolution)
# fetch the data
return await lazycogs.read_chunk_async(
items=items,
bands=bands,
chunk_affine=affine,
dst_crs=dst_crs,
chunk_width=width,
chunk_height=height,
store=store,
)
async def get_array(
dst_crs: CRS,
dst_bbox: tuple[float, float, float, float],
resolution: float,
bands: list[str],
*args,
**kwargs,
) -> dict[str, np.ndarray]:
# perform the STAC search
transformer = Transformer.from_crs(dst_crs, "epsg:4326", always_xy=True)
bbox_4326 = transformer.transform_bounds(*dst_bbox)
items = await rustac.search(
*args,
**kwargs,
bbox=bbox_4326,
)
# build the object store with skip_signature=True
# (needed for sentinel 2 COGs)
sample_item = items[0]
parsed = urlparse(sample_item["assets"][bands[0]]["href"])
store = from_url(f"{parsed.scheme}://{parsed.netloc}", skip_signature=True)
# maximize efficiency by aligning the input grid to the grid of the
# underlying COG assets
xmin, ymin, xmax, ymax = lazycogs.align_bbox(
sample_item["assets"][bands[0]][
"proj:transform"
], # not guaranteed to exist on any item!
dst_bbox,
)
# define the affine transform and array shape
affine = Affine.from_gdal(xmin, resolution, 0.0, ymax, 0.0, -resolution)
width = int((xmax - xmin) / resolution)
height = int((ymax - ymin) / resolution)
# fetch the data
return await lazycogs.read_chunk_async(
items=items,
bands=bands,
chunk_affine=affine,
dst_crs=dst_crs,
chunk_width=width,
chunk_height=height,
store=store,
)
Fetch arrays for a range of resolutions. Full resolution of these images is 10, the lower resolutions will use overviews in the COGs.
In [3]:
Copied!
dst_crs = CRS.from_user_input("epsg:32615")
dst_bbox = (564150, 5172421, 591430, 5187766)
data = {}
for resolution in [300, 120, 60, 10]:
with timer(f"loading the {resolution} m resolution array"):
data[resolution] = await get_array(
dst_crs=dst_crs,
dst_bbox=dst_bbox,
resolution=resolution,
bands=["red", "green", "blue"],
href="data/midwest_summer_2025.parquet",
collections=["sentinel-2-c1-l2a"],
datetime="2025-06-02",
limit=100,
filter={
"op": "=",
"args": [{"property": "proj:epsg"}, dst_crs.to_epsg()],
},
)
dst_crs = CRS.from_user_input("epsg:32615")
dst_bbox = (564150, 5172421, 591430, 5187766)
data = {}
for resolution in [300, 120, 60, 10]:
with timer(f"loading the {resolution} m resolution array"):
data[resolution] = await get_array(
dst_crs=dst_crs,
dst_bbox=dst_bbox,
resolution=resolution,
bands=["red", "green", "blue"],
href="data/midwest_summer_2025.parquet",
collections=["sentinel-2-c1-l2a"],
datetime="2025-06-02",
limit=100,
filter={
"op": "=",
"args": [{"property": "proj:epsg"}, dst_crs.to_epsg()],
},
)
loading the 300 m resolution array took 1.31 seconds loading the 120 m resolution array took 1.17 seconds loading the 60 m resolution array took 1.83 seconds loading the 10 m resolution array took 3.14 seconds
Plot them!
In [4]:
Copied!
for arr in data.values():
rgb = np.concatenate(list(arr.values()), axis=0).transpose(1, 2, 0)
rgb_display = (255 * np.clip(rgb.astype(float) / 4000.0, 0, 1)).astype(np.uint16)
plt.figure(figsize=(10, 8))
plt.imshow(rgb_display)
plt.axis("off")
plt.show()
for arr in data.values():
rgb = np.concatenate(list(arr.values()), axis=0).transpose(1, 2, 0)
rgb_display = (255 * np.clip(rgb.astype(float) / 4000.0, 0, 1)).astype(np.uint16)
plt.figure(figsize=(10, 8))
plt.imshow(rgb_display)
plt.axis("off")
plt.show()