Create a Dynamic RTree backend¶
By default cogeo-mosaic backends were meant to handle writing and reading mosaicjson either from a file or from a database.
While this is fine for most use cases, some users could want something more dynamic
. In this Notebook we will show how to create a Dynamic mosaic backend based on RTree (https://rtree.readthedocs.io/en/latest/tutorial.html#using-rtree-as-a-cheapo-spatial-database).
Requirements¶
To be able to run this notebook you'll need the following requirements:
- cogeo-mosaic
- rtree
- shapely
- tqdm
# Uncomment this line if you need to install the dependencies
# !pip install cogeo-mosaic rtree shapely tqdm
import httpx
import pickle
from tqdm.notebook import tqdm
from rtree import index
1. Create the rtree Index¶
Azure is hosting a RTree index (tile_index) and a binary file with all the Naip geometry (tiles.p)
binary: https://naipeuwest.blob.core.windows.net/naip-index/rtree/tiles.p Rtree: https://naipeuwest.blob.core.windows.net/naip-index/rtree/tile_index.dat and https://naipeuwest.blob.core.windows.net/naip-index/rtree/tile_index.idx
Sadly the Rtree contains only the Indexes, which then has to be used to retrieve the path and geometry in tiles.p
For this Demo we need to store the information directly in the Rtree object
# Download geometry file
url = "https://naipeuwest.blob.core.windows.net/naip-index/rtree/tiles.p"
with httpx.stream("GET", url) as r:
r.raise_for_status()
with open("tiles.p", "wb") as f:
for chunk in r.iter_bytes(chunk_size=8192):
f.write(chunk)
# Load tile index and create rtree index
with open("tiles.p", "rb") as f:
tile_index = pickle.load(f)
/var/folders/5m/9w3sxz2n1d12vyk8k_cz76w40000gn/T/ipykernel_37008/165553250.py:11: UserWarning: Unpickling a shapely <2.0 geometry object. Please save the pickle again; shapely 2.1 will not have this compatibility. tile_index = pickle.load(f)
# Create the Cheapo Rtree database
# Make sure naip.dat and naip.idx do not exists
naip_index = index.Rtree('naip')
for idx, (f, geom) in tqdm(tile_index.items(), total=len(tile_index)):
naip_index.insert(idx, geom.bounds, obj=f"https://naipeuwest.blob.core.windows.net/naip/{f}")
naip_index.close()
0%| | 0/1018253 [00:00<?, ?it/s]
2. Create backend¶
from typing import Dict, List, Callable
import attr
import morecantile
from rio_tiler.io import BaseReader
from rio_tiler.constants import WEB_MERCATOR_TMS
from cogeo_mosaic.backends.base import BaseBackend
from cogeo_mosaic.mosaic import MosaicJSON
@attr.s
class DynamicRtreeBackend(BaseBackend):
asset_filter: Callable = attr.ib(default=lambda x: x)
# The reader is read-only, we can't pass mosaic_def to the init method
mosaic_def: MosaicJSON = attr.ib(init=False)
index = attr.ib(init=False)
tms: morecantile.TileMatrixSet = attr.ib(default=WEB_MERCATOR_TMS)
minzoom: int = attr.ib(default=12) # we know this by analysing the NAIP data
maxzoom: int = attr.ib(default=17) # we know this by analysing the NAIP data
_backend_name = "DynamicSTAC"
def __attrs_post_init__(self):
"""Post Init."""
# Construct a FAKE mosaicJSON
# mosaic_def has to be defined. As we do for the DynamoDB and SQLite backend
# we set `tiles` to an empty list.
self.mosaic_def = MosaicJSON(
mosaicjson="0.0.3",
name="it's fake but it's ok",
minzoom=self.minzoom,
maxzoom=self.maxzoom,
tiles=[],
tilematrixset=self.tms
)
self.index = index.Index(self.input)
self.bounds = tuple(self.index.bounds)
self.crs = self.tms.rasterio_geographic_crs
def close(self):
"""Close SQLite connection."""
self.index.close()
def __exit__(self, exc_type, exc_value, traceback):
"""Support using with Context Managers."""
self.close()
def write(self, overwrite: bool = True):
"""This method is not used but is required by the abstract class."""
pass
def update(self):
"""We overwrite the default method."""
pass
def _read(self) -> MosaicJSON:
"""This method is not used but is required by the abstract class."""
pass
def assets_for_tile(self, x: int, y: int, z: int) -> List[str]:
"""Retrieve assets for tile."""
bbox = self.tms.bounds(x, y, z)
return self.get_assets(bbox)
def assets_for_point(self, lng: float, lat: float) -> List[str]:
"""Retrieve assets for point."""
EPSILON = 1e-14
bbox = (lng - EPSILON, lat - EPSILON, lng + EPSILON, lat + EPSILON)
return self.get_assets(bbox)
def get_assets(self, bbox) -> List[str]:
"""Find assets."""
assets = [n.object for n in self.index.intersection(bbox, objects=True)]
return self.asset_filter(assets)
@property
def _quadkeys(self) -> List[str]:
return []
# Get assets for a Tile requests
with DynamicRtreeBackend("naip") as mosaic:
print(mosaic.assets_for_tile(4684, 6278, 14))
['https://naipeuwest.blob.core.windows.net/naip/v002/md/2013/md_100cm_2013/38077/m_3807724_nw_18_1_20130924.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/md/2011/md_100cm_2011/38077/m_3807724_nw_18_1_20110629.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/md/2015/md_100cm_2015/38077/m_3807724_nw_18_1_20150814.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/va/2011/va_100cm_2011/38077/m_3807724_nw_18_1_20110530.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/md/2017/md_100cm_2017/38077/m_3807724_nw_18_1_20170716.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/md/2018/md_060cm_2018/38077/m_3807724_nw_18_060_20181025.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/md/2018/md_060cm_2018/38077/m_3807724_ne_18_060_20181019.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/md/2015/md_100cm_2015/38077/m_3807724_ne_18_1_20150814.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/md/2011/md_100cm_2011/38077/m_3807724_ne_18_1_20110629.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/md/2013/md_100cm_2013/38077/m_3807724_ne_18_1_20130924.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/va/2011/va_100cm_2011/38077/m_3807724_ne_18_1_20110530.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/md/2017/md_100cm_2017/38077/m_3807724_ne_18_1_20170716.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/va/2012/va_100cm_2012/38077/m_3807724_ne_18_1_20120511.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/va/2012/va_100cm_2012/38077/m_3807724_nw_18_1_20120511.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/va/2014/va_100cm_2014/38077/m_3807724_nw_18_1_20140927.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/va/2014/va_100cm_2014/38077/m_3807724_ne_18_1_20140927.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/va/2016/va_100cm_2016/38077/m_3807724_ne_18_1_20160718.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/va/2016/va_100cm_2016/38077/m_3807724_nw_18_1_20160718.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/va/2018/va_060cm_2018/38077/m_3807724_ne_18_060_20181019.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/va/2018/va_060cm_2018/38077/m_3807724_nw_18_060_20181025.tif']
The naip dataset has couple overlapping years, to create an optimized mosaic we need to filter the assets.
Here is an example of filter function which takes the latest data and highest resolution first.
import pathlib
def latest_naip_asset(assets: List[str]) -> List[str]:
def get_info(asset) -> Dict:
parts = pathlib.Path(asset).parts
capture_date = parts[-1].split("_")[-1].rstrip(".tif")
resolution = int(parts[-3].split("_")[1].rstrip("cm"))
fname_parts = parts[-1].split("_")
quadrangle = f"{fname_parts[1]}_{fname_parts[2]}_{fname_parts[3]}"
return {
"path": asset,
"capture_date": capture_date,
"quadrangle": quadrangle,
"resolution": resolution
}
asset_info = [get_info(f) for f in assets]
# Sort by resolution and by dates
asset_info = sorted(
asset_info, key=lambda item: (item["capture_date"], -item["resolution"]),
reverse=True
)
quad = []
out_dataset = []
for d in asset_info:
q = d["quadrangle"]
if q not in quad:
out_dataset.append(d["path"])
quad.append(q)
return out_dataset
with DynamicRtreeBackend("naip", asset_filter=latest_naip_asset) as mosaic:
print(mosaic.assets_for_tile(4684, 6278, 14))
['https://naipeuwest.blob.core.windows.net/naip/v002/md/2018/md_060cm_2018/38077/m_3807724_nw_18_060_20181025.tif', 'https://naipeuwest.blob.core.windows.net/naip/v002/md/2018/md_060cm_2018/38077/m_3807724_ne_18_060_20181019.tif']