From 51224672ffb891e2c0b93627a2e49792636df24e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Ziemianek?= <49795849+Rafalz13@users.noreply.github.com> Date: Fri, 11 Oct 2024 14:55:29 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Add=20`get=5Fobject=5Fsizes`=20meth?= =?UTF-8?q?od=20to=20S3=20source=20(#1092)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✨ Added new method to S3 `get_object_sizes` * ✨ Added new custom Exception - `NoFilesToProcessError` --- src/viadot/exceptions.py | 4 ++++ src/viadot/sources/s3.py | 13 +++++++++++++ 2 files changed, 17 insertions(+) diff --git a/src/viadot/exceptions.py b/src/viadot/exceptions.py index 5a69383dd..d717de7c9 100644 --- a/src/viadot/exceptions.py +++ b/src/viadot/exceptions.py @@ -71,3 +71,7 @@ def __init__( class DataBufferExceededError(Exception): pass + + +class NoFilesToProcessError(Exception): + pass diff --git a/src/viadot/sources/s3.py b/src/viadot/sources/s3.py index efb950e29..ef2c4c733 100644 --- a/src/viadot/sources/s3.py +++ b/src/viadot/sources/s3.py @@ -339,3 +339,16 @@ def get_page_iterator( paginator = client.get_paginator(operation_name=operation_name) return paginator.paginate(Bucket=bucket_name, Prefix=directory_path, **kwargs) + + def get_object_sizes(self, file_paths: str | list[str]) -> dict[str, int | None]: + """Retrieve the sizes of specified S3 objects. + + Args: + file_paths (str | list[str]): A single file path or a list of file paths + in S3 bucket. + + Returns: + dict[str, int]: A dictionary where the keys are file paths and the values + are their corresponding sizes in bytes. + """ + return wr.s3.size_objects(boto3_session=self.session, path=file_paths)