Skip to content

Commit

Permalink
refactor: postprocessing move to unit test (#18779)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhaoyongjie authored Feb 17, 2022
1 parent cd38187 commit 30a9d14
Show file tree
Hide file tree
Showing 17 changed files with 1,324 additions and 1,098 deletions.
1,098 changes: 0 additions & 1,098 deletions tests/integration_tests/pandas_postprocessing_tests.py

This file was deleted.

File renamed without changes.
16 changes: 16 additions & 0 deletions tests/unit_tests/pandas_postprocessing/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
40 changes: 40 additions & 0 deletions tests/unit_tests/pandas_postprocessing/test_aggregate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from superset.utils.pandas_postprocessing import aggregate
from tests.unit_tests.fixtures.dataframes import categories_df
from tests.unit_tests.pandas_postprocessing.utils import series_to_list


def test_aggregate():
aggregates = {
"asc sum": {"column": "asc_idx", "operator": "sum"},
"asc q2": {
"column": "asc_idx",
"operator": "percentile",
"options": {"q": 75},
},
"desc q1": {
"column": "desc_idx",
"operator": "percentile",
"options": {"q": 25},
},
}
df = aggregate(df=categories_df, groupby=["constant"], aggregates=aggregates)
assert df.columns.tolist() == ["constant", "asc sum", "asc q2", "desc q1"]
assert series_to_list(df["asc sum"])[0] == 5050
assert series_to_list(df["asc q2"])[0] == 75
assert series_to_list(df["desc q1"])[0] == 25
126 changes: 126 additions & 0 deletions tests/unit_tests/pandas_postprocessing/test_boxplot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import pytest

from superset.exceptions import QueryObjectValidationError
from superset.utils.core import PostProcessingBoxplotWhiskerType
from superset.utils.pandas_postprocessing import boxplot
from tests.unit_tests.fixtures.dataframes import names_df


def test_boxplot_tukey():
df = boxplot(
df=names_df,
groupby=["region"],
whisker_type=PostProcessingBoxplotWhiskerType.TUKEY,
metrics=["cars"],
)
columns = {column for column in df.columns}
assert columns == {
"cars__mean",
"cars__median",
"cars__q1",
"cars__q3",
"cars__max",
"cars__min",
"cars__count",
"cars__outliers",
"region",
}
assert len(df) == 4


def test_boxplot_min_max():
df = boxplot(
df=names_df,
groupby=["region"],
whisker_type=PostProcessingBoxplotWhiskerType.MINMAX,
metrics=["cars"],
)
columns = {column for column in df.columns}
assert columns == {
"cars__mean",
"cars__median",
"cars__q1",
"cars__q3",
"cars__max",
"cars__min",
"cars__count",
"cars__outliers",
"region",
}
assert len(df) == 4


def test_boxplot_percentile():
df = boxplot(
df=names_df,
groupby=["region"],
whisker_type=PostProcessingBoxplotWhiskerType.PERCENTILE,
metrics=["cars"],
percentiles=[1, 99],
)
columns = {column for column in df.columns}
assert columns == {
"cars__mean",
"cars__median",
"cars__q1",
"cars__q3",
"cars__max",
"cars__min",
"cars__count",
"cars__outliers",
"region",
}
assert len(df) == 4


def test_boxplot_percentile_incorrect_params():
with pytest.raises(QueryObjectValidationError):
boxplot(
df=names_df,
groupby=["region"],
whisker_type=PostProcessingBoxplotWhiskerType.PERCENTILE,
metrics=["cars"],
)

with pytest.raises(QueryObjectValidationError):
boxplot(
df=names_df,
groupby=["region"],
whisker_type=PostProcessingBoxplotWhiskerType.PERCENTILE,
metrics=["cars"],
percentiles=[10],
)

with pytest.raises(QueryObjectValidationError):
boxplot(
df=names_df,
groupby=["region"],
whisker_type=PostProcessingBoxplotWhiskerType.PERCENTILE,
metrics=["cars"],
percentiles=[90, 10],
)

with pytest.raises(QueryObjectValidationError):
boxplot(
df=names_df,
groupby=["region"],
whisker_type=PostProcessingBoxplotWhiskerType.PERCENTILE,
metrics=["cars"],
percentiles=[10, 90, 10],
)
62 changes: 62 additions & 0 deletions tests/unit_tests/pandas_postprocessing/test_compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from superset.utils.pandas_postprocessing import compare
from tests.unit_tests.fixtures.dataframes import timeseries_df2
from tests.unit_tests.pandas_postprocessing.utils import series_to_list


def test_compare():
# `difference` comparison
post_df = compare(
df=timeseries_df2,
source_columns=["y"],
compare_columns=["z"],
compare_type="difference",
)
assert post_df.columns.tolist() == ["label", "y", "z", "difference__y__z"]
assert series_to_list(post_df["difference__y__z"]) == [0.0, -2.0, -8.0, -6.0]

# drop original columns
post_df = compare(
df=timeseries_df2,
source_columns=["y"],
compare_columns=["z"],
compare_type="difference",
drop_original_columns=True,
)
assert post_df.columns.tolist() == ["label", "difference__y__z"]

# `percentage` comparison
post_df = compare(
df=timeseries_df2,
source_columns=["y"],
compare_columns=["z"],
compare_type="percentage",
)
assert post_df.columns.tolist() == ["label", "y", "z", "percentage__y__z"]
assert series_to_list(post_df["percentage__y__z"]) == [0.0, -0.5, -0.8, -0.75]

# `ratio` comparison
post_df = compare(
df=timeseries_df2,
source_columns=["y"],
compare_columns=["z"],
compare_type="ratio",
)
assert post_df.columns.tolist() == ["label", "y", "z", "ratio__y__z"]
assert series_to_list(post_df["ratio__y__z"]) == [1.0, 0.5, 0.2, 0.25]
69 changes: 69 additions & 0 deletions tests/unit_tests/pandas_postprocessing/test_contribution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

from datetime import datetime

import pytest
from pandas import DataFrame

from superset.exceptions import QueryObjectValidationError
from superset.utils.core import DTTM_ALIAS, PostProcessingContributionOrientation
from superset.utils.pandas_postprocessing import contribution


def test_contribution():
df = DataFrame(
{
DTTM_ALIAS: [datetime(2020, 7, 16, 14, 49), datetime(2020, 7, 16, 14, 50),],
"a": [1, 3],
"b": [1, 9],
}
)
with pytest.raises(QueryObjectValidationError, match="not numeric"):
contribution(df, columns=[DTTM_ALIAS])

with pytest.raises(QueryObjectValidationError, match="same length"):
contribution(df, columns=["a"], rename_columns=["aa", "bb"])

# cell contribution across row
processed_df = contribution(
df, orientation=PostProcessingContributionOrientation.ROW,
)
assert processed_df.columns.tolist() == [DTTM_ALIAS, "a", "b"]
assert processed_df["a"].tolist() == [0.5, 0.25]
assert processed_df["b"].tolist() == [0.5, 0.75]

# cell contribution across column without temporal column
df.pop(DTTM_ALIAS)
processed_df = contribution(
df, orientation=PostProcessingContributionOrientation.COLUMN
)
assert processed_df.columns.tolist() == ["a", "b"]
assert processed_df["a"].tolist() == [0.25, 0.75]
assert processed_df["b"].tolist() == [0.1, 0.9]

# contribution only on selected columns
processed_df = contribution(
df,
orientation=PostProcessingContributionOrientation.COLUMN,
columns=["a"],
rename_columns=["pct_a"],
)
assert processed_df.columns.tolist() == ["a", "b", "pct_a"]
assert processed_df["a"].tolist() == [1, 3]
assert processed_df["b"].tolist() == [1, 9]
assert processed_df["pct_a"].tolist() == [0.25, 0.75]
Loading

0 comments on commit 30a9d14

Please sign in to comment.