Skip to content

ProcessLinksMiddleware

Middleware to remove the application root path from incoming requests and update links in responses.

ProcessLinksMiddleware dataclass

Bases: JsonResponseMiddleware

Middleware to update links in responses, removing the upstream_url path and adding the root_path if it exists.

Parameters:

Name Type Description Default
app Callable[list, Awaitable[None]]
required
upstream_url str
required
root_path str | None
None
json_content_type_expr str
'application/(geo\\+)?json'
Source code in src/stac_auth_proxy/middleware/ProcessLinksMiddleware.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
@dataclass
class ProcessLinksMiddleware(JsonResponseMiddleware):
    """
    Middleware to update links in responses, removing the upstream_url path and adding
    the root_path if it exists.
    """

    app: ASGIApp
    upstream_url: str
    root_path: Optional[str] = None

    json_content_type_expr: str = r"application/(geo\+)?json"

    def should_transform_response(self, request: Request, scope: Scope) -> bool:
        """Only transform responses with JSON content type."""
        return bool(
            re.match(
                self.json_content_type_expr,
                Headers(scope=scope).get("content-type", ""),
            )
        )

    def transform_json(self, data: dict[str, Any], request: Request) -> dict[str, Any]:
        """Update links in the response to include root_path."""
        # Get the client's actual base URL (accounting for load balancers/proxies)
        req_base_url = get_base_url(request)
        parsed_req_url = urlparse(req_base_url)
        parsed_upstream_url = urlparse(self.upstream_url)

        for link in get_links(data):
            try:
                self._update_link(link, parsed_req_url, parsed_upstream_url)
            except Exception as e:
                logger.error(
                    "Failed to parse link href %r, (ignoring): %s",
                    link.get("href"),
                    str(e),
                )
        return data

    def _update_link(
        self, link: dict[str, Any], request_url: ParseResult, upstream_url: ParseResult
    ) -> None:
        """
        Ensure that link hrefs that are local to upstream url are rewritten as local to
        the proxy.
        """
        if "href" not in link:
            logger.warning("Link %r has no href", link)
            return

        parsed_link = urlparse(link["href"])

        if parsed_link.netloc not in [
            request_url.netloc,
            upstream_url.netloc,
        ]:
            logger.debug(
                "Ignoring link %s because it is not for an endpoint behind this proxy (%s or %s)",
                link["href"],
                request_url.netloc,
                upstream_url.netloc,
            )
            return

        # If the link path is not a descendant of the upstream path, don't transform it
        if upstream_url.path != "/" and not parsed_link.path.startswith(
            upstream_url.path
        ):
            logger.debug(
                "Ignoring link %s because it is not descendant of upstream path (%s)",
                link["href"],
                upstream_url.path,
            )
            return

        # Replace the upstream host with the client's host
        if parsed_link.netloc == upstream_url.netloc:
            parsed_link = parsed_link._replace(netloc=request_url.netloc)._replace(
                scheme=request_url.scheme
            )

        # Rewrite the link path
        if upstream_url.path != "/" and parsed_link.path.startswith(upstream_url.path):
            parsed_link = parsed_link._replace(
                path=parsed_link.path[len(upstream_url.path) :]
            )

        # Add the root_path to the link if it exists
        if self.root_path:
            parsed_link = parsed_link._replace(
                path=f"{self.root_path}{parsed_link.path}"
            )

        logger.debug(
            "Rewriting %r link %r to %r",
            link.get("rel"),
            link["href"],
            urlunparse(parsed_link),
        )

        link["href"] = urlunparse(parsed_link)

should_transform_response(request: Request, scope: Scope) -> bool

Only transform responses with JSON content type.

Source code in src/stac_auth_proxy/middleware/ProcessLinksMiddleware.py
33
34
35
36
37
38
39
40
def should_transform_response(self, request: Request, scope: Scope) -> bool:
    """Only transform responses with JSON content type."""
    return bool(
        re.match(
            self.json_content_type_expr,
            Headers(scope=scope).get("content-type", ""),
        )
    )

transform_json(data: dict[str, Any], request: Request) -> dict[str, Any]

Update links in the response to include root_path.

Source code in src/stac_auth_proxy/middleware/ProcessLinksMiddleware.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
def transform_json(self, data: dict[str, Any], request: Request) -> dict[str, Any]:
    """Update links in the response to include root_path."""
    # Get the client's actual base URL (accounting for load balancers/proxies)
    req_base_url = get_base_url(request)
    parsed_req_url = urlparse(req_base_url)
    parsed_upstream_url = urlparse(self.upstream_url)

    for link in get_links(data):
        try:
            self._update_link(link, parsed_req_url, parsed_upstream_url)
        except Exception as e:
            logger.error(
                "Failed to parse link href %r, (ignoring): %s",
                link.get("href"),
                str(e),
            )
    return data