Skip to content

Commit

Permalink
✨ Add get_object_sizes method to S3 source (#1092)
Browse files Browse the repository at this point in the history
* ✨ Added new method to S3 `get_object_sizes`

* ✨ Added new custom Exception - `NoFilesToProcessError`
  • Loading branch information
Rafalz13 authored Oct 11, 2024
1 parent 38ef42a commit 5122467
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/viadot/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,7 @@ def __init__(

class DataBufferExceededError(Exception):
pass


class NoFilesToProcessError(Exception):
pass
13 changes: 13 additions & 0 deletions src/viadot/sources/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,3 +339,16 @@ def get_page_iterator(
paginator = client.get_paginator(operation_name=operation_name)

return paginator.paginate(Bucket=bucket_name, Prefix=directory_path, **kwargs)

def get_object_sizes(self, file_paths: str | list[str]) -> dict[str, int | None]:
"""Retrieve the sizes of specified S3 objects.
Args:
file_paths (str | list[str]): A single file path or a list of file paths
in S3 bucket.
Returns:
dict[str, int]: A dictionary where the keys are file paths and the values
are their corresponding sizes in bytes.
"""
return wr.s3.size_objects(boto3_session=self.session, path=file_paths)

0 comments on commit 5122467

Please sign in to comment.