From 51224672ffb891e2c0b93627a2e49792636df24e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Ziemianek?=
 <49795849+Rafalz13@users.noreply.github.com>
Date: Fri, 11 Oct 2024 14:55:29 +0200
Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Add=20`get=5Fobject=5Fsizes`=20meth?=
 =?UTF-8?q?od=20to=20S3=20source=20(#1092)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* ✨ Added new method to S3 `get_object_sizes`

* ✨ Added new custom Exception - `NoFilesToProcessError`
---
 src/viadot/exceptions.py |  4 ++++
 src/viadot/sources/s3.py | 13 +++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/src/viadot/exceptions.py b/src/viadot/exceptions.py
index 5a69383dd..d717de7c9 100644
--- a/src/viadot/exceptions.py
+++ b/src/viadot/exceptions.py
@@ -71,3 +71,7 @@ def __init__(
 
 class DataBufferExceededError(Exception):
     pass
+
+
+class NoFilesToProcessError(Exception):
+    pass
diff --git a/src/viadot/sources/s3.py b/src/viadot/sources/s3.py
index efb950e29..ef2c4c733 100644
--- a/src/viadot/sources/s3.py
+++ b/src/viadot/sources/s3.py
@@ -339,3 +339,16 @@ def get_page_iterator(
         paginator = client.get_paginator(operation_name=operation_name)
 
         return paginator.paginate(Bucket=bucket_name, Prefix=directory_path, **kwargs)
+
+    def get_object_sizes(self, file_paths: str | list[str]) -> dict[str, int | None]:
+        """Retrieve the sizes of specified S3 objects.
+
+        Args:
+            file_paths (str | list[str]): A single file path or a list of file paths
+                in S3 bucket.
+
+        Returns:
+            dict[str, int]: A dictionary where the keys are file paths and the values
+                are their corresponding sizes in bytes.
+        """
+        return wr.s3.size_objects(boto3_session=self.session, path=file_paths)