Skip to content

Sign vrt-strings #33

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Nov 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# 0.4.4

## New Features

* `sign` will now sign VRT-like strings, like those returned by GDAL's [STACIT](https://gdal.org/drivers/raster/stacit.html) driver.

# 0.4.3

## Bug Fixes
Expand Down
2 changes: 2 additions & 0 deletions planetary_computer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
)
from planetary_computer.settings import set_subscription_key

from planetary_computer.version import __version__

__all__ = [
"set_subscription_key",
Expand All @@ -20,4 +21,5 @@
"sign_item",
"sign_url",
"sign",
"__version__",
]
85 changes: 80 additions & 5 deletions planetary_computer/sas.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from datetime import datetime, timezone
from typing import Any, Dict, Optional
import warnings
Expand All @@ -11,7 +12,13 @@
from pystac_client import ItemSearch

from planetary_computer.settings import Settings
from planetary_computer.utils import parse_blob_url, parse_adlfs_url, is_fsspec_asset
from planetary_computer.utils import (
parse_blob_url,
parse_adlfs_url,
is_fsspec_asset,
is_vrt_string,
asset_xpr,
)


BLOB_STORAGE_DOMAIN = ".blob.core.windows.net"
Expand Down Expand Up @@ -73,13 +80,41 @@ def sign(obj: Any) -> Any:


@sign.register(str)
def sign_string(url: str) -> str:
"""Sign a URL or VRT-like string containing URLs with a Shared Access (SAS) Token

Signing with a SAS token allows read access to files in blob storage.

Args:
url (str): The HREF of the asset as a URL or a GDAL VRT

Single URLs can be found on a STAC Item's Asset ``href`` value. Only URLs to
assets in Azure Blob Storage are signed, other URLs are returned unmodified.

GDAL VRTs can combine many data sources into a single mosaic. A VRT can be
built quickly from the GDAL STACIT driver
https://gdal.org/drivers/raster/stacit.html. Each URL to Azure Blob Storage
within the VRT is signed.

Returns:
str: The signed HREF or VRT
"""
if is_vrt_string(url):
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the record, this doesn't seem to have a measurable effect on the timing of planetary_computer.sign(url):

# main, after signing a URL to get a cached token
In [5]: %timeit planetary_computer.sign(url)
12.2 µs ± 1.97 µs per loop (mean ± std. dev. of 7 runs, 100000 loops each)

# HEAD
In [4]: %timeit planetary_computer.sign(url)
11.3 µs ± 1.7 µs per loop (mean ± std. dev. of 7 runs, 100000 loops each)

return sign_vrt_string(url)
else:
return sign_url(url)


def sign_url(url: str) -> str:
"""Sign a URL with a Shared Access (SAS) Token, which allows for read access.
"""Sign a URL or with a Shared Access (SAS) Token

Signing with a SAS token allows read access to files in blob storage.

Args:
url (str): The HREF of the asset in the format of a URL.
This can be found on STAC Item's Asset 'href' value. Only URLs to assets
in Azure Blob Storage are signed, other URLs are returned unmodified.
url (str): The HREF of the asset as a URL

Single URLs can be found on a STAC Item's Asset ``href`` value. Only URLs to
assets in Azure Blob Storage are signed, other URLs are returned unmodified.

Returns:
str: The signed HREF
Expand All @@ -93,6 +128,46 @@ def sign_url(url: str) -> str:
return token.sign(url).href


def _repl_vrt(m: re.Match) -> str:
# replace all blob-storages URLs with a signed version.
return sign_url(m.string[slice(*m.span())])


def sign_vrt_string(vrt: str) -> str:
"""Sign a VRT-like string containing URLs with a Shared Access (SAS) Token

Signing with a SAS token allows read access to files in blob storage.

Args:
vrt (str): The GDAL VRT

GDAL VRTs can combine many data sources into a single mosaic. A VRT can be
built quickly from the GDAL STACIT driver
https://gdal.org/drivers/raster/stacit.html. Each URL to Azure Blob Storage
within the VRT is signed.

Returns:
str: The signed VRT

Examples
--------
>>> from osgeo import gdal
>>> from pathlib import Path
>>> search = (
... "STACIT:\"https://planetarycomputer.microsoft.com/api/stac/v1/search?"
... "collections=naip&bbox=-100,40,-99,41"
... "&datetime=2019-01-01T00:00:00Z%2F..\":asset=image"
... )
>>> gdal.Translate("out.vrt", search)
>>> signed_vrt = planetary_computer.sign(Path("out.vrt").read_text())
>>> print(signed_vrt)
<VRTDataset rasterXSize="161196" rasterYSize="25023">
...
</VRTDataset>
"""
return asset_xpr.sub(_repl_vrt, vrt)


@sign.register(Item)
def sign_item(item: Item) -> Item:
"""Sign all assets within a PySTAC item
Expand Down
17 changes: 17 additions & 0 deletions planetary_computer/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

from typing import Tuple, Optional
from urllib.parse import ParseResult, urlunparse, urlparse

Expand Down Expand Up @@ -57,3 +59,18 @@ def is_fsspec_asset(asset: pystac.Asset) -> bool:
return "account_name" in asset.extra_fields.get(
"table:storage_options", {}
) or "account_name" in asset.extra_fields.get("xarray:storage_options", {})


def is_vrt_string(s: str) -> bool:
"""
Check whether a string looks like a VRT
"""
return s.strip().startswith("<VRTDataset") and s.strip().endswith("</VRTDataset>")


asset_xpr = re.compile(
r"https://(?P<account>[A-z0-9]+?)"
r"\.blob\.core\.windows\.net/"
r"(?P<container>.+?)"
r"/(?P<blob>[^<]+)"
)
3 changes: 2 additions & 1 deletion planetary_computer/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
__version__ = "0.4.3"
"""Library version"""

__version__ = "0.4.4"
9 changes: 5 additions & 4 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
black==20.8b1
flake8==3.8.4
black==21.10b0
flake8==4.0.1
ipdb==0.13.7
mypy==0.790
setuptools==56.0.0
mypy==0.910
types-requests==2.26.0
setuptools==58.5.3
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = planetary-computer
version = 0.4.3
version = 0.4.4
license_file = LICENSE
author = microsoft
author_email = planetarycomputer@microsoft.com
Expand Down
Loading