Merge pull request dyvenia#731 from dyvenia/dev

Release 0.4.18 PR
Diego-H-S · Jul 27, 2023 · a2d32f2 · a2d32f2
2 parents 434bb89 + 2cdcea0
commit a2d32f2
Show file tree

Hide file tree

Showing 32 changed files with 2,221 additions and 548 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,33 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed
 
+
+## [0.4.18] - 2023-07-27
+### Added
+- Added `SQLServerToParquet` flow.
+- Added `SAPBW` source class.
+- Added `SAPBWToDF` task class.
+- Added `SAPBWToADLS` flow class.
+- Added a new `end_point` parameter in `genesys_api_connection` to make it more generic.
+- Added `VidClubToADLS` flow class.
+
+### Fixed
+- Fixed a bug in `subject` (extra separator) and in `receivers` (long strings) parameters in `Outlook` connector. 
+- Fixed issue with credentials handling in `VidClub` source class.
+- Fixed issue with missing arguments in `VidClubToDF` task class.
+
+### Changed
+- Genesys API call method and the name changed from `genesys_generate_exports` to `genesys_api_connection`. 
+- Added `GET` connection inside the method `genesys_api_connection`.
+- Added new parameters in the `GenesysToCSV` task to be able to extract `web message` files.
+- Changed looping structure for API calls in `VidClub` source class to use time intervals.
+- Changed `VidClubToDF` task class to use total_load function from source.
+
+### Removed
+- Removed methods never used in production: `get_analitics_url_report`, `get_all_schedules_job`, `schedule_report`,
+`to_df`, `delete_scheduled_report_job` and `generate_reporting_export`.
+
+
 ## [0.4.17] - 2023-06-15
 ### Fixed
 - Fixed issue with `tzlocal` for O365 package

diff --git a/tests/integration/flows/test_sap_bw_to_adls.py b/tests/integration/flows/test_sap_bw_to_adls.py
@@ -0,0 +1,53 @@
+import os
+from unittest import mock
+
+import pandas as pd
+import pytest
+
+from viadot.flows import SAPBWToADLS
+
+DATA = {
+    "[0CALMONTH].[LEVEL01].[DESCRIPTION]": ["January 2023"],
+    "date": ["2023-06-19 11:12:43+00:00"],
+}
+
+ADLS_FILE_NAME = "test_sap_bw_to_adls.parquet"
+ADLS_DIR_PATH = "raw/tests/"
+
+
+@mock.patch(
+    "viadot.tasks.SAPBWToDF.run",
+    return_value=pd.DataFrame(data=DATA),
+)
+@pytest.mark.run
+def test_sap_bw_to_adls_flow_run(mocked_class):
+    flow = SAPBWToADLS(
+        "test_sap_bw_to_adls_flow_run",
+        sapbw_credentials_key="SAP",
+        env="BW",
+        mdx_query="""
+            SELECT
+                    {
+                }
+                    ON COLUMNS,
+            NON EMPTY
+                    { 
+                        { [0CALMONTH].[202301] } 
+            } 
+            DIMENSION PROPERTIES
+            DESCRIPTION,
+            MEMBER_NAME
+            ON ROWS
+
+            FROM ZCSALORD1/ZBW4_ZCSALORD1_006_BOA
+        
+                    """,
+        mapping_dict={"[0CALMONTH].[LEVEL01].[DESCRIPTION]": "Calendar Year/Month"},
+        overwrite_adls=True,
+        adls_dir_path=ADLS_DIR_PATH,
+        adls_file_name=ADLS_FILE_NAME,
+    )
+    result = flow.run()
+    assert result.is_successful()
+    os.remove("test_sap_bw_to_adls_flow_run.parquet")
+    os.remove("test_sap_bw_to_adls_flow_run.json")
diff --git a/tests/integration/flows/test_sql_server_to_parquet.py b/tests/integration/flows/test_sql_server_to_parquet.py
@@ -0,0 +1,40 @@
+import os
+
+import pytest
+from prefect import Flow
+
+from viadot.flows import SQLServerToParquet
+from viadot.tasks import SQLServerToDF
+from viadot.tasks.sql_server import SQLServerQuery
+
+SCHEMA = "sandbox"
+TABLE = "test"
+PATH = "test.parquet"
+
+
+@pytest.fixture(scope="session")
+def create_table():
+    query_task = SQLServerQuery("AZURE_SQL")
+    query_task.run(f"DROP TABLE IF EXISTS {SCHEMA}.{TABLE}")
+    query_task.run(f"CREATE TABLE {SCHEMA}.{TABLE} (Id INT, Name VARCHAR (10))")
+    yield True
+
+
+def test_sql_server_to_parquet_flow(create_table):
+    flow = SQLServerToParquet(
+        name="test_flow",
+        sql_query=f"SELECT * FROM {SCHEMA}.{TABLE}",
+        local_file_path=PATH,
+        if_exists="fail",
+        sqlserver_config_key="AZURE_SQL",
+        timeout=3600,
+    )
+    flow.gen_flow()
+    assert isinstance(flow, Flow)
+    assert len(flow.tasks) == 3  # Number of tasks in the flow
+    tasks = list(flow.tasks)
+
+    assert isinstance(tasks[0], SQLServerToDF)
+    flow.run()
+    assert os.path.isfile(PATH) == True
+    os.remove(PATH)
diff --git a/tests/integration/flows/test_vidclub_to_adls.py b/tests/integration/flows/test_vidclub_to_adls.py
@@ -0,0 +1,31 @@
+import os
+from unittest import mock
+
+import pandas as pd
+import pytest
+
+from viadot.flows import VidClubToADLS
+
+DATA = {"col1": ["aaa", "bbb", "ccc"], "col2": [11, 22, 33]}
+ADLS_FILE_NAME = "test_vid_club.parquet"
+ADLS_DIR_PATH = "raw/test/"
+
+
+@mock.patch(
+    "viadot.tasks.VidClubToDF.run",
+    return_value=pd.DataFrame(data=DATA),
+)
+@pytest.mark.run
+def test_vidclub_to_adls_run_flow(mocked_class):
+    flow = VidClubToADLS(
+        "test_vidclub_to_adls_flow_run",
+        source=["test"],
+        from_date="2023-06-05",
+        overwrite_adls=True,
+        adls_dir_path=ADLS_DIR_PATH,
+        adls_file_name=ADLS_FILE_NAME,
+    )
+    result = flow.run()
+    assert result.is_successful()
+    os.remove("test_vidclub_to_adls_flow_run.parquet")
+    os.remove("test_vidclub_to_adls_flow_run.json")
diff --git a/tests/integration/tasks/test_bigquery.py b/tests/integration/tasks/test_bigquery.py
@@ -17,10 +17,10 @@ def test_bigquery_to_df_success():
         credentials_key=CREDENTIALS_KEY,
     )
     df = bigquery_to_df_task.run()
-    expectation_columns = ["date", "name", "count", "refresh"]
+    expected_column = ["my_value"]
 
     assert isinstance(df, pd.DataFrame)
-    assert expectation_columns == list(df.columns)
+    assert expected_column == list(df.columns)
 
 
 def test_bigquery_to_df_wrong_table_name(caplog):
@@ -46,7 +46,7 @@ def test_bigquery_to_df_wrong_column_name(caplog):
     with caplog.at_level(logging.WARNING):
         df = bigquery_to_df_task.run()
     assert f"'wrong_column_name' column is not recognized." in caplog.text
-    assert df.empty
+    assert isinstance(df, pd.DataFrame)
 
 
 def test_bigquery_to_df_wrong_query(caplog):