Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions qfieldcloud_sdk/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def logout(ctx):
help="Includes the public project in the list. Default: False",
)
@click.pass_context
def list_projects(ctx, include_public, **opts):
def list_projects(ctx, include_public: bool, **opts) -> None:
"""List QFieldCloud projects."""

log("Listing projects…")
Expand Down Expand Up @@ -396,7 +396,7 @@ def list_jobs(ctx, project_id, job_type: Optional[sdk.JobTypes], **opts):

log(f'Listing project "{project_id}" jobs…')

jobs: List[Dict[Any]] = ctx.obj["client"].list_jobs(
jobs: List[Dict] = ctx.obj["client"].list_jobs(
project_id,
job_type,
sdk.Pagination(**opts),
Expand Down
6 changes: 3 additions & 3 deletions qfieldcloud_sdk/interfaces.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import json
from typing import Any, List, Union
from typing import Any

import requests

Expand Down Expand Up @@ -37,7 +37,7 @@ def __init__(self, **kwargs):
self.headers["X-Previous-Page"] = prev_url
self.headers["X-Next-Page"] = next_url

def json(self) -> Union[QfcMockItem, List[QfcMockItem]]:
def json(self):
if self.request_kwargs["method"] == "GET":
return [QfcMockItem(id=n) for n in range(self.total)]
else:
Expand Down Expand Up @@ -69,7 +69,7 @@ def __init__(self, response: requests.Response, *args):
except Exception:
json_content = ""

self.reason = f'Requested "{response.url}" and got "{response.status_code} {response.reason}":\n{json_content or response.content}'
self.reason = f'Requested "{response.url}" and got "{response.status_code} {response.reason}":\n{json_content or response.content.decode()}'

def __str__(self) -> str:
return self.reason
Expand Down
55 changes: 32 additions & 23 deletions qfieldcloud_sdk/sdk.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
import sys
from enum import Enum
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Union
from typing import Any, Callable, Dict, List, Optional, Union, cast
from urllib import parse as urlparse

import requests
import urllib3
from requests.adapters import HTTPAdapter, Retry

from .interfaces import QfcException, QfcRequest, QfcRequestException
from .utils import get_md5sum, log
from .utils import calc_etag, log

logger = logging.getLogger(__file__)

Expand Down Expand Up @@ -136,13 +136,13 @@ def list_projects(
their own and optionally the public ones.
"""
params = {
"include-public": int(include_public),
"include-public": str(int(include_public)), # type: ignore
}

payload = self._request_json(
"GET", "projects", params=params, pagination=pagination
)
return payload
return cast(List, payload)

def list_remote_files(
self, project_id: str, skip_metadata: bool = True
Expand All @@ -153,7 +153,11 @@ def list_remote_files(
params["skip_metadata"] = "1"

resp = self._request("GET", f"files/{project_id}", params=params)
return resp.json()
remote_files = resp.json()
# TODO remove this temporary decoration with `etag` key

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This TODO should just stay as a reminder for the next one who works on this?

Copy link
Collaborator Author

@suricactus suricactus May 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. This decoration will be obsolete once the "file metadata in database" is developed.

remote_files = list(map(lambda f: {"etag": f["md5sum"], **f}, remote_files))

return remote_files

def create_project(
self,
Expand Down Expand Up @@ -217,8 +221,8 @@ def upload_files(
remote_file = f
break

md5sum = get_md5sum(local_file["absolute_filename"])
if remote_file and remote_file.get("md5sum", None) == md5sum:
etag = calc_etag(local_file["absolute_filename"])
if remote_file and remote_file.get("etag", None) == etag:
continue

files_to_upload.append(local_file)
Expand Down Expand Up @@ -333,7 +337,7 @@ def list_jobs(
"""
Returns a paginated lists of jobs accessible to the user.
"""
return self._request_json(
payload = self._request_json(
"GET",
"jobs/",
{
Expand All @@ -342,6 +346,7 @@ def list_jobs(
},
pagination=pagination,
)
return cast(List, payload)

def job_trigger(
self, project_id: str, job_type: JobTypes, force: bool = False
Expand Down Expand Up @@ -537,9 +542,9 @@ def download_files(

for file in files_to_download:
local_filename = Path(f'{local_dir}/{file["name"]}')
md5sum = None
etag = None
if not force_download:
md5sum = file.get("md5sum", None)
etag = file.get("etag", None)

try:
self.download_file(
Expand All @@ -548,7 +553,7 @@ def download_files(
local_filename,
file["name"],
show_progress,
md5sum,
etag,
)
file["status"] = FileTransferStatus.SUCCESS
except QfcRequestException as err:
Expand All @@ -575,8 +580,8 @@ def download_file(
local_filename: Path,
remote_filename: Path,
show_progress: bool,
remote_md5sum: str = None,
) -> requests.Response:
remote_etag: str = None,
) -> Optional[requests.Response]:
"""Download a single project file.

Args:
Expand All @@ -585,17 +590,17 @@ def download_file(
local_filename (Path): Local filename
remote_filename (Path): Remote filename
show_progress (bool): Show progressbar in the console
remote_md5sum (str, optional): The md5sum of the remote file. If is None, the download of the file happens even if it already exists locally. Defaults to None.
remote_etag (str, optional): The ETag of the remote file. If is None, the download of the file happens even if it already exists locally. Defaults to `None`.

Raises:
NotImplementedError: Raised if unknown `download_type` is passed

Returns:
requests.Response: the response object
requests.Response | None: the response object
"""

if remote_md5sum and local_filename.exists():
if get_md5sum(str(local_filename)) == remote_md5sum:
if remote_etag and local_filename.exists():
if calc_etag(str(local_filename)) == remote_etag:
if show_progress:
print(
f"{remote_filename}: Already present locally. Download skipped."
Expand All @@ -604,7 +609,7 @@ def download_file(
logger.info(
f'Skipping download of "{remote_filename}" because it is already present locally'
)
return
return None

if download_type == FileTransferType.PROJECT:
url = f"files/{project_id}/{remote_filename}"
Expand Down Expand Up @@ -688,7 +693,7 @@ def _request_json(
allow_redirects=None,
pagination: Pagination = Pagination(),
) -> Union[List, Dict]:
result = None
result: Optional[Union[List, Dict]] = None
is_empty_pagination = pagination.is_empty

while True:
Expand All @@ -708,11 +713,15 @@ def _request_json(
payload = resp.json()

if isinstance(payload, list):
result = cast(List, result)

if result:
result += payload
else:
result = payload
elif isinstance(payload, dict):
result = cast(Dict, result)

if result:
result = {**result, **payload}
else:
Expand All @@ -731,8 +740,8 @@ def _request_json(

query_params = urlparse.parse_qs(urlparse.urlparse(next_url).query)
pagination = Pagination(
limit=query_params["limit"],
offset=query_params["offset"],
limit=cast(int, query_params["limit"]),
offset=cast(int, query_params["offset"]),
)

return result
Expand Down Expand Up @@ -778,8 +787,8 @@ def _request(
offset = pagination.offset or 0
params = {
**params,
"limit": limit,
"offset": offset,
"limit": str(limit),
"offset": str(offset),
}

request_params = {
Expand Down
48 changes: 47 additions & 1 deletion qfieldcloud_sdk/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import hashlib
import json
import os
import sys


Expand All @@ -14,7 +15,10 @@ def log(*msgs):


def get_md5sum(filename: str) -> str:
"""Calculate sha256sum of a file"""
"""Calculate md5sum of a file.

Currently unused but will be revived in the upcoming versions.
"""
BLOCKSIZE = 65536
hasher = hashlib.md5()
with open(filename, "rb") as f:
Expand All @@ -23,3 +27,45 @@ def get_md5sum(filename: str) -> str:
hasher.update(buf)
buf = f.read(BLOCKSIZE)
return hasher.hexdigest()


def calc_etag(filename: str, part_size: int = 8 * 1024 * 1024) -> str:
"""Calculate ETag as in Object Storage (S3) of a local file.

ETag is a MD5. But for the multipart uploaded files, the MD5 is computed from the concatenation of the MD5s of each uploaded part.

See the inspiration of this implementation here: https://stackoverflow.com/a/58239738/1226137

Args:
filename (str): the local filename
part_size (int): the size of the Object Storage part. Most Object Storages use 8MB. Defaults to 8*1024*1024.

Returns:
str: the calculated ETag value
"""
with open(filename, "rb") as f:
file_size = os.fstat(f.fileno()).st_size

if file_size <= part_size:
BLOCKSIZE = 65536
hasher = hashlib.md5()

buf = f.read(BLOCKSIZE)
while len(buf) > 0:
hasher.update(buf)
buf = f.read(BLOCKSIZE)

return hasher.hexdigest()
else:
# Say you uploaded a 14MB file and your part size is 5MB.
# Calculate 3 MD5 checksums corresponding to each part, i.e. the checksum of the first 5MB, the second 5MB, and the last 4MB.
# Then take the checksum of their concatenation.
# Since MD5 checksums are hex representations of binary data, just make sure you take the MD5 of the decoded binary concatenation, not of the ASCII or UTF-8 encoded concatenation.
# When that's done, add a hyphen and the number of parts to get the ETag.
md5sums = []
for data in iter(lambda: f.read(part_size), b""):
md5sums.append(hashlib.md5(data).digest())

final_md5sum = hashlib.md5(b"".join(md5sums))

return "{}-{}".format(final_md5sum.hexdigest(), len(md5sums))