Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert Thumbor URLs to S3 paths consistently. #83

Merged
merged 2 commits into from
Jan 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions tests/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

from tests import BaseS3TestCase
from thumbor_aws.storage import Storage
from thumbor_aws.utils import normalize_path


@pytest.mark.usefixtures("test_images")
Expand All @@ -43,7 +44,7 @@ async def test_can_put_file_in_s3(self):
f"{self._prefix}{filepath}",
)
status, data, _ = await storage.get_data(
self.bucket_name, storage.normalize_path(filepath)
self.bucket_name, normalize_path(storage.root_path, filepath)
)
expect(status).to_equal(200)
expect(data).to_equal(expected)
Expand All @@ -65,7 +66,8 @@ async def test_can_put_crypto_in_s3(self):
f"{self._prefix}{filepath}.txt",
)
status, data, _ = await storage.get_data(
self.bucket_name, storage.normalize_path(filepath + ".txt")
self.bucket_name,
normalize_path(storage.root_path, filepath + ".txt"),
)
expect(status).to_equal(200)
expect(data).to_equal("ACME-SEC")
Expand All @@ -90,7 +92,7 @@ async def test_can_put_detector_data_in_s3(self):
)
status, data, _ = await storage.get_data(
self.bucket_name,
storage.normalize_path(filepath + ".detectors.txt"),
normalize_path(storage.root_path, filepath + ".detectors.txt"),
)
expect(status).to_equal(200)
expect(data).to_equal(b'{"some": "data"}')
Expand Down Expand Up @@ -121,7 +123,9 @@ async def test_can_handle_expired_data(self):
await storage.put(filepath, expected)

status, data, _ = await storage.get_data(
self.bucket_name, storage.normalize_path(filepath), expiration=0
self.bucket_name,
normalize_path(storage.root_path, filepath),
expiration=0,
)

expect(status).to_equal(410)
Expand All @@ -138,7 +142,7 @@ async def test_can_get_crypto_from_s3(self):
filepath = f"/test/can_put_file_{uuid4()}"

await storage.upload(
storage.normalize_path(filepath + ".txt"),
normalize_path(storage.root_path, filepath + ".txt"),
b"ACME-SEC2",
"application/text",
"http://my-site.com",
Expand All @@ -155,7 +159,7 @@ async def test_can_get_detector_data_from_s3(self):
storage = Storage(self.context)
filepath = f"/test/can_put_file_{uuid4()}"
await storage.upload(
storage.normalize_path(filepath + ".detectors.txt"),
normalize_path(storage.root_path, filepath + ".detectors.txt"),
b'{"some": "data"}',
"application/text",
"",
Expand Down
10 changes: 2 additions & 8 deletions thumbor_aws/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from thumbor_aws.config import Config
from thumbor_aws.s3_client import S3Client
from thumbor_aws.utils import normalize_path

Config.define(
"AWS_LOADER_REGION_NAME",
Expand Down Expand Up @@ -82,7 +83,7 @@ async def load(context, path):
client.configuration["bucket_name"], path
)

norm_path = normalize_url(client.configuration["root_path"], real_path)
norm_path = normalize_path(client.configuration["root_path"], real_path)
result = LoaderResult()

status_code, body, last_modified = await client.get_data(
Expand Down Expand Up @@ -116,10 +117,3 @@ def get_bucket_and_path(configured_bucket: str, path: str) -> (str, str):
real_path = "/".join(split_path[1:])

return bucket, real_path


def normalize_url(prefix: str, path: str) -> str:
"""Function to normalize URLs before reaching into S3"""
if prefix:
return f"{prefix.rstrip('/')}/{path.lstrip('/')}"
return path
20 changes: 7 additions & 13 deletions thumbor_aws/result_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@


from datetime import datetime, timezone
from urllib.parse import unquote

from deprecated import deprecated
from thumbor.engines import BaseEngine
Expand All @@ -19,6 +18,7 @@

from thumbor_aws.config import Config
from thumbor_aws.s3_client import S3Client
from thumbor_aws.utils import normalize_path

Config.define(
"AWS_RESULT_STORAGE_REGION_NAME",
Expand Down Expand Up @@ -135,7 +135,7 @@ def root_path(self) -> str:
)

async def put(self, image_bytes: bytes) -> str:
file_abspath = self.normalize_path(self.context.request.url)
file_abspath = normalize_path(self.prefix, self.context.request.url)
logger.debug("[RESULT_STORAGE] putting at %s", file_abspath)
content_type = BaseEngine.get_mimetype(image_bytes)
response = await self.upload(
Expand All @@ -159,19 +159,13 @@ def is_auto_webp(self) -> bool:
self.context.config.AUTO_WEBP and self.context.request.accepts_webp
)

def normalize_path(self, path: str) -> str:
"""Returns the path used for result storage"""
prefix = "auto_webp/" if self.is_auto_webp else ""
fs_path = unquote(path).lstrip("/")
return (
f"{self.root_path.rstrip('/')}/"
f"{prefix.lstrip('/')}"
f"{fs_path.lstrip('/')}"
)
@property
def prefix(self) -> str:
return ("auto_webp/" if self.is_auto_webp else "") + self.root_path

async def get(self) -> ResultStorageResult:
path = self.context.request.url
file_abspath = self.normalize_path(path)
file_abspath = normalize_path(self.prefix, path)

logger.debug("[RESULT_STORAGE] getting from %s", file_abspath)

Expand Down Expand Up @@ -211,7 +205,7 @@ async def last_updated( # pylint: disable=invalid-overridden-method
self,
) -> datetime:
path = self.context.request.url
file_abspath = self.normalize_path(path)
file_abspath = normalize_path(self.prefix, path)
logger.debug("[RESULT_STORAGE] getting from %s", file_abspath)

response = await self.get_object_metadata(file_abspath)
Expand Down
23 changes: 9 additions & 14 deletions thumbor_aws/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@

from json import dumps, loads
from typing import Any
from urllib.parse import unquote

from thumbor import storages
from thumbor.engines import BaseEngine
from thumbor.utils import logger

from thumbor_aws.config import Config
from thumbor_aws.s3_client import S3Client
from thumbor_aws.utils import normalize_path

Config.define(
"AWS_STORAGE_REGION_NAME",
Expand Down Expand Up @@ -93,7 +93,7 @@ def root_path(self) -> str:

async def put(self, path: str, file_bytes: bytes) -> str:
content_type = BaseEngine.get_mimetype(file_bytes)
normalized_path = self.normalize_path(path)
normalized_path = normalize_path(self.root_path, path)
logger.debug("[STORAGE] putting at %s", normalized_path)
path = await self.upload(
normalized_path,
Expand All @@ -113,7 +113,7 @@ async def put_crypto(self, path: str) -> str:
"True if no SECURITY_KEY specified"
)

normalized_path = self.normalize_path(path)
normalized_path = normalize_path(self.root_path, path)
crypto_path = f"{normalized_path}.txt"
key = self.context.server.security_key.encode()
s3_path = await self.upload(
Expand All @@ -128,7 +128,7 @@ async def put_crypto(self, path: str) -> str:
return s3_path

async def put_detector_data(self, path: str, data: Any) -> str:
normalized_path = self.normalize_path(path)
normalized_path = normalize_path(self.root_path, path)
filepath = f"{normalized_path}.detectors.txt"
details = dumps(data)
return await self.upload(
Expand All @@ -139,7 +139,7 @@ async def put_detector_data(self, path: str, data: Any) -> str:
)

async def get(self, path: str) -> bytes:
normalized_path = self.normalize_path(path)
normalized_path = normalize_path(self.root_path, path)
status, body, _ = await self.get_data(
self.bucket_name, normalized_path
)
Expand All @@ -149,7 +149,7 @@ async def get(self, path: str) -> bytes:
return body

async def get_crypto(self, path: str) -> str:
normalized_path = self.normalize_path(path)
normalized_path = normalize_path(self.root_path, path)
crypto_path = f"{normalized_path}.txt"
status, body, _ = await self.get_data(self.bucket_name, crypto_path)
if status != 200:
Expand All @@ -158,7 +158,7 @@ async def get_crypto(self, path: str) -> str:
return body.decode("utf-8")

async def get_detector_data(self, path: str) -> Any:
normalized_path = self.normalize_path(path)
normalized_path = normalize_path(self.root_path, path)
detector_path = f"{normalized_path}.detectors.txt"
status, body, _ = await self.get_data(self.bucket_name, detector_path)
if status != 200:
Expand All @@ -167,7 +167,7 @@ async def get_detector_data(self, path: str) -> Any:
return loads(body)

async def exists(self, path: str) -> bool:
normalized_path = self.normalize_path(path)
normalized_path = normalize_path(self.root_path, path)
return await self.object_exists(normalized_path)

async def remove(self, path: str):
Expand All @@ -176,7 +176,7 @@ async def remove(self, path: str):
return

async with self.get_client() as client:
normalized_path = self.normalize_path(path)
normalized_path = normalize_path(self.root_path, path)
response = await client.delete_object(
Bucket=self.bucket_name,
Key=normalized_path,
Expand All @@ -186,8 +186,3 @@ async def remove(self, path: str):
raise RuntimeError(
f"Failed to remove {normalized_path}: Status {status}"
)

def normalize_path(self, path: str) -> str:
"""Returns the path used for storage"""
path = unquote(path).lstrip("/")
return f"{self.root_path.rstrip('/')}/{path.lstrip('/')}"
14 changes: 14 additions & 0 deletions thumbor_aws/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from urllib.parse import unquote


def normalize_path(prefix: str, path: str) -> str:
"""Convert a URL received from Thumbor to a key in a S3 bucket."""
# Thumbor calls load/store functions with a URL that is URL-encoded and
# that always starts with a slash. S3Client doesn't expect URL-encoding
# and S3 keys don't usually start with a slash. Decode and remove slash.
path = unquote(path).lstrip("/")
if prefix:
# Avoid double slash if prefix ends with a slash.
prefix = prefix.rstrip("/")
return f"{prefix}/{path}"
return path