Skip to content

Commit

Permalink
Merge pull request dyvenia#758 from adrian-wojcik/sharepoint_and_add_…
Browse files Browse the repository at this point in the history
…viadot_metadata_columns_decorator

♻️ Changed _and_add_viadot_metadata_columns_decorator and aplied it t…
  • Loading branch information
Rafalz13 authored Oct 10, 2023
2 parents 37909e0 + c5a1ed6 commit 35ecc6a
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 11 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed

### Changed
- Modified `add_viadot_metadata_columns` to be able to apply a parameter source_name to the decorator for to_df funtion or function where the DataFrame is generated.
- Changed `SharepointToDF` task in order to implement add_viadot_metadata_columns with value `source_name="Sharepoint"` after changes.

- Changed, `Mindful` credentials passed by the `auth` parameter, instead of by the `header`.

Expand Down
2 changes: 2 additions & 0 deletions viadot/tasks/sharepoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from ..exceptions import ValidationError
from ..sources import Sharepoint
from .azure_key_vault import AzureKeyVaultSecret
from ..utils import add_viadot_metadata_columns

logger = logging.get_logger()

Expand Down Expand Up @@ -147,6 +148,7 @@ def split_sheet(
"sheet_number",
"validate_excel_file",
)
@add_viadot_metadata_columns(source_name="Sharepoint")
def run(
self,
path_to_file: str = None,
Expand Down
45 changes: 34 additions & 11 deletions viadot/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,21 +408,44 @@ def check_if_empty_file(
handle_if_empty_file(if_empty, message=f"Input file - '{path}' is empty.")


def add_viadot_metadata_columns(func: Callable) -> Callable:
def add_viadot_metadata_columns(source_name: str = None) -> Callable:
"""
Decorator that adds metadata columns to df in 'to_df' method.
For now only _viadot_source is available because _viadot_downloaded_at_utc is added on the Flow level.
Args:
source_name (str, optional): The name of the source to be included in the DataFrame.
This should be provided when creating a DataFrame in a Task, rather than in a Source.
Defaults to None.
Warning: Please remember to include brackets when applying a decorator, even if you are not passing the 'source_name' parameter.
Example:
In task:
@add_viadot_metadata_columns(source_name="Sharepoint")
def to_df(self):
...
In source:
@add_viadot_metadata_columns()
def to_df(self):
...
"""

@functools.wraps(func)
def wrapper(*args, **kwargs) -> pd.DataFrame:
df = func(*args, **kwargs)
def decorator(func) -> Callable:
@functools.wraps(func)
def wrapper(*args, **kwargs) -> pd.DataFrame:
df = func(*args, **kwargs)

df["_viadot_source"] = (
source_name if source_name is not None else args[0].__class__.__name__
)

return df

# Accessing instance
instance = args[0]
_viadot_source = kwargs.get("source_name") or instance.__class__.__name__
df["_viadot_source"] = _viadot_source
# df["_viadot_downloaded_at_utc"] = datetime.now(timezone.utc).replace(microsecond=0)
return df
return wrapper

return wrapper
return decorator

0 comments on commit 35ecc6a

Please sign in to comment.