Skip to content

Commit

Permalink
Improve the checking logic with mixed normal and enhanced refreshes
Browse files Browse the repository at this point in the history
  • Loading branch information
RadekBuczkowski committed May 13, 2024
1 parent f5dd3b1 commit 474345c
Show file tree
Hide file tree
Showing 2 changed files with 159 additions and 25 deletions.
48 changes: 28 additions & 20 deletions src/spetlr/power_bi/PowerBi.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,7 +504,7 @@ def _verify_workspace(self, *, force_verify: bool = False) -> bool:
return False
df = workspaces.get_pandas_df()
if self.workspace_id is not None:
if self.workspace_id not in df["WorkspaceId"].values:
if self.workspace_id not in df.WorkspaceId.values:
self._raise_error(
f"Workspace id '{self.workspace_id}' cannot be found!"
)
Expand All @@ -520,7 +520,7 @@ def _verify_workspace(self, *, force_verify: bool = False) -> bool:
],
)
return False
rows = df.loc[df["WorkspaceName"] == self.workspace_name, "WorkspaceId"]
rows = df.loc[df.WorkspaceName == self.workspace_name, "WorkspaceId"]
if rows.empty:
self._raise_error(
f"Workspace name '{self.workspace_name}' cannot be found!"
Expand Down Expand Up @@ -549,7 +549,7 @@ def _verify_dataset(self, *, force_verify: bool = False) -> bool:
if df.empty and not self._verify_workspace(force_verify=True):
return False
if self.dataset_id is not None:
if self.dataset_id not in df["DatasetId"].values:
if self.dataset_id not in df.DatasetId.values:
self._raise_error(
f"Dataset id '{self.dataset_id}' cannot be found!"
)
Expand All @@ -565,7 +565,7 @@ def _verify_dataset(self, *, force_verify: bool = False) -> bool:
],
)
return False
rows = df.loc[df["DatasetName"] == self.dataset_name, "DatasetId"]
rows = df.loc[df.DatasetName == self.dataset_name, "DatasetId"]
if rows.empty:
self._raise_error(
f"Dataset name '{self.dataset_name}' cannot be found, "
Expand Down Expand Up @@ -638,9 +638,9 @@ def _connect_and_get_workspaces(
if workspaces is None:
return None
workspace_list = [
(df["WorkspaceId"], df["WorkspaceName"])
(df.WorkspaceId, df.WorkspaceName)
for _, df in workspaces.get_pandas_df().iterrows()
if not (skip_read_only and df["IsReadOnly"])
if not (skip_read_only and df.IsReadOnly)
]
return workspace_list

Expand Down Expand Up @@ -707,17 +707,17 @@ def _combine_dataframes(
if datasets is None:
return None
for dataset_id, dataset_name in (
(df["DatasetId"], df["DatasetName"])
(df.DatasetId, df.DatasetName)
for _, df in datasets.get_pandas_df().iterrows()
if not (skip_not_refreshable and not df["IsRefreshable"])
if not (skip_not_refreshable and not df.IsRefreshable)
and not (
skip_effective_identity
and (
df["IsEffectiveIdentityRequired"]
or df["IsEffectiveIdentityRolesRequired"]
df.IsEffectiveIdentityRequired
or df.IsEffectiveIdentityRolesRequired
)
)
and (df["ConfiguredBy"].lower() if df["ConfiguredBy"] else None)
and (df.ConfiguredBy.lower() if df.ConfiguredBy else None)
not in self.exclude_creators
):
data = function(workspace_id, dataset_id, True)
Expand Down Expand Up @@ -760,14 +760,14 @@ def _combine_refresh_history_details(self):
result = None
for workspace_id, workspace_name, dataset_id, dataset_name, request_id in (
(
self.workspace_id if self.workspace_id else df["WorkspaceId"],
self.workspace_name if self.workspace_id else df["WorkspaceName"],
self.dataset_id if self.dataset_id else df["DatasetId"],
self.dataset_name if self.dataset_id else df["DatasetName"],
df["RequestId"],
self.workspace_id if self.workspace_id else df.WorkspaceId,
self.workspace_name if self.workspace_id else df.WorkspaceName,
self.dataset_id if self.dataset_id else df.DatasetId,
self.dataset_name if self.dataset_id else df.DatasetName,
df.RequestId,
)
for _, df in history.get_pandas_df().iterrows()
if df["RefreshType"] == "ViaEnhancedApi" and df["Status"] == "Completed"
if df.RefreshType == "ViaEnhancedApi" and df.Status == "Completed"
):
data = self._get_refresh_history_details(
workspace_id, dataset_id, request_id, True
Expand Down Expand Up @@ -814,14 +814,22 @@ def _get_last_refresh(self, *, finished_only: bool = False) -> bool:
return False
df = history.get_pandas_df()
if not df.empty:
skip = finished_only and df.Status.iloc[0] == "Unknown" and df.shape[0] > 1
first = 1 if skip else 0
first = 0
while (
df.RefreshType.iloc[first] == "ViaEnhancedApi"
and df.shape[0] > first + 1
and (
self.table_names is None
or (finished_only and first == 0 and df.Status.iloc[0] == "Unknown")
)
):
first += 1
self.last_status = df.Status.iloc[first]
self.last_exception = df.Error.iloc[first]
# calculate the average duration of all previous API refresh calls
# when there were no table names specified
mean = df.loc[
(df["RefreshType"] == "ViaApi") & (df["Status"] == "Completed"),
(df.RefreshType == "ViaApi") & (df.Status == "Completed"),
df.Seconds.name,
].mean()
if pd.isna(mean) or self.table_names:
Expand Down
136 changes: 131 additions & 5 deletions tests/local/power_bi/test_power_bi.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,50 @@ def requests_get(url, headers):

@patch("requests.get")
def test_get_last_refresh_success(self, mock_get):
# Arrange
mock_response = Mock()
mock_response.status_code = 200
mock_response.json.return_value = {
"value": [
{
"requestId": "2",
"id": "2",
"refreshType": "ViaEnhancedApi", # skip an enhanced refresh
"startTime": "2024-02-27T17:00:00Z",
"endTime": None,
"status": "Unknown",
"serviceExceptionJson": None,
},
{
"requestId": "1",
"id": "1",
"refreshType": "ViaApi",
"startTime": "2024-02-26T10:00:00Z",
"endTime": "2024-02-26T10:05:00Z",
"status": "Completed",
"serviceExceptionJson": None,
},
]
}
mock_get.return_value = mock_response

sut = PowerBi(PowerBiClient(), workspace_id="test", dataset_id="test")
sut.powerbi_url = "test/"
sut._connect = lambda: True

# Act
result = sut._get_last_refresh(finished_only=False)

# Assert
self.assertTrue(result)
self.assertEqual("Completed", sut.last_status)
self.assertIsNone(sut.last_exception)
self.assertEqual("2024-02-26 10:05:00+00:00", str(sut.last_refresh_utc))
# average of ViaApi only
self.assertEqual(5 * 60, sut.last_duration_in_seconds)

@patch("requests.get")
def test_get_last_refresh_finished_only_with_success(self, mock_get):
# Arrange
mock_response = Mock()
mock_response.status_code = 200
Expand All @@ -712,10 +756,10 @@ def test_get_last_refresh_success(self, mock_get):
{
"requestId": "5",
"id": "4",
"refreshType": "ViaApi",
"refreshType": "ViaEnhancedApi", # skip an enhanced refresh
"startTime": "2024-02-27T17:00:00Z",
"endTime": "2024-02-27T17:05:00Z",
"status": "Unknown", # skip because refresh is in progress
"status": "Unknown",
"serviceExceptionJson": None,
},
{
Expand Down Expand Up @@ -761,6 +805,7 @@ def test_get_last_refresh_success(self, mock_get):
sut = PowerBi(PowerBiClient(), workspace_id="test", dataset_id="test")
sut.powerbi_url = "test/"
sut._connect = lambda: True
expected_duration = int(7.5 * 60) # average duration from all ViaApi

# Act
result = sut._get_last_refresh(finished_only=True)
Expand All @@ -771,7 +816,79 @@ def test_get_last_refresh_success(self, mock_get):
self.assertIsNone(sut.last_exception)
self.assertEqual("2024-02-26 10:05:00+00:00", str(sut.last_refresh_utc))
# average of ViaApi only
self.assertEqual(int(7.5 * 60), sut.last_duration_in_seconds)
self.assertEqual(expected_duration, sut.last_duration_in_seconds)

@patch("requests.get")
def test_get_last_refresh_finished_only_normal_with_success(self, mock_get):
# Arrange
mock_response = Mock()
mock_response.status_code = 200
mock_response.json.return_value = {
"value": [
{
"requestId": "5",
"id": "4",
"refreshType": "ViaEnhancedApi", # skip an enhanced refresh
"startTime": "2024-02-27T17:00:00Z",
"endTime": "2024-02-27T17:05:00Z",
"status": "Unknown",
"serviceExceptionJson": None,
},
{
"requestId": "4",
"id": "4",
"refreshType": "ViaEnhancedApi", # skip an enhanced refresh
"startTime": "2024-02-27T16:00:00Z",
"endTime": "2024-02-27T16:05:00Z",
"status": "Completed",
"serviceExceptionJson": None,
},
{
"requestId": "3",
"id": "3",
"refreshType": "ViaEnhancedApi", # skip an enhanced refresh
"startTime": "2024-02-27T15:00:00Z",
"endTime": "2024-02-27T15:05:00Z",
"status": "Completed",
"serviceExceptionJson": None,
},
{
"requestId": "2",
"id": "2",
"refreshType": "ViaEnhancedApi", # skip an enhanced refresh
"startTime": "2024-02-27T14:00:00Z",
"endTime": "2024-02-27T14:09:00Z",
"status": "Completed",
"serviceExceptionJson": None,
},
{
"requestId": "1",
"id": "1",
"refreshType": "ViaEnhancedApi", # the last is ok
"startTime": "2024-02-27T10:00:00Z",
"endTime": "2024-02-27T10:11:00Z",
"status": "Completed",
"serviceExceptionJson": None,
},
]
}
mock_get.return_value = mock_response

sut = PowerBi(PowerBiClient(), workspace_id="test", dataset_id="test")
sut.powerbi_url = "test/"
sut._connect = lambda: True

# Act
result = sut._get_last_refresh(finished_only=True)
expected_duration = 0 # average duration from all ViaApi

# Assert
self.assertTrue(result)
self.assertEqual("Completed", sut.last_status)
self.assertIsNone(sut.last_exception)
self.assertEqual("2024-02-27 10:11:00+00:00", str(sut.last_refresh_utc))
# average of ViaApi only
self.assertEqual(expected_duration, sut.last_duration_in_seconds)

@patch("requests.get")
@patch("requests.post")
Expand All @@ -781,10 +898,19 @@ def test_get_last_refresh_with_tables_success(self, mock_post, mock_get):
mock_get_response.status_code = 200
mock_get_response.json.return_value = {
"value": [
{
"requestId": "3",
"id": "3",
"refreshType": "ViaEnhancedApi",
"startTime": "2024-02-27T17:00:00Z",
"endTime": "2024-02-27T17:05:00Z",
"status": "Unknown", # skip as an enhanced refresh is in progress
"serviceExceptionJson": None,
},
{
"requestId": "2",
"id": "2",
"refreshType": "ViaApi",
"refreshType": "ViaEnhancedApi",
"startTime": "2024-02-26T10:00:00Z",
"endTime": "2024-02-26T10:05:00Z",
"status": "Completed",
Expand Down Expand Up @@ -846,7 +972,7 @@ def test_get_last_refresh_with_tables_success(self, mock_post, mock_get):
sut._connect = lambda: True

# Act
result = sut._get_last_refresh()
result = sut._get_last_refresh(finished_only=True)

# Assert
self.assertTrue(result)
Expand Down

0 comments on commit 474345c

Please sign in to comment.