apache · villebro · Apr 10, 2020 · Mar 31, 2020 · Apr 3, 2020 · Apr 8, 2020
diff --git a/requirements.txt b/requirements.txt
@@ -52,7 +52,7 @@ marshmallow==2.19.5       # via flask-appbuilder, marshmallow-enum, marshmallow-
 more-itertools==8.1.0     # via zipp
 msgpack==0.6.2            # via apache-superset (setup.py)
 numpy==1.18.1             # via pandas, pyarrow
-pandas==0.25.3            # via apache-superset (setup.py)
+pandas==1.0.3             # via apache-superset (setup.py)
 parsedatetime==2.5        # via apache-superset (setup.py)
 pathlib2==2.3.5           # via apache-superset (setup.py)
 polyline==1.4.0           # via apache-superset (setup.py)

diff --git a/setup.py b/setup.py
@@ -88,7 +88,7 @@ def get_git_sha():
         "isodate",
         "markdown>=3.0",
         "msgpack>=0.6.1, <0.7.0",
-        "pandas>=0.25.3, <1.0",
+        "pandas>=1.0.3, <1.1",
         "parsedatetime",
         "pathlib2",
         "polyline",

diff --git a/superset/common/query_context.py b/superset/common/query_context.py
@@ -51,7 +51,7 @@ class QueryContext:
     custom_cache_timeout: Optional[int]
 
     # TODO: Type datasource and query_object dictionary with TypedDict when it becomes
-    # a vanilla python type https://github.com/python/mypy/issues/5288
+    #  a vanilla python type https://github.com/python/mypy/issues/5288
     def __init__(
         self,
         datasource: Dict[str, Any],
@@ -70,8 +70,8 @@ def get_query_result(self, query_object: QueryObject) -> Dict[str, Any]:
         """Returns a pandas dataframe based on the query object"""
 
         # Here, we assume that all the queries will use the same datasource, which is
-        # is a valid assumption for current setting. In a long term, we may or maynot
-        # support multiple queries from different data source.
+        # a valid assumption for current setting. In the long term, we may
+        # support multiple queries from different data sources.
 
         timestamp_format = None
         if self.datasource.type == "table":
@@ -105,6 +105,9 @@ def get_query_result(self, query_object: QueryObject) -> Dict[str, Any]:
                 self.df_metrics_to_num(df, query_object)
 
             df.replace([np.inf, -np.inf], np.nan)
+
+        df = query_object.exec_post_processing(df)
+
         return {
             "query": result.query,
             "status": result.status,

diff --git a/superset/common/query_object.py b/superset/common/query_object.py
@@ -20,13 +20,16 @@
 from typing import Any, Dict, List, Optional, Union
 
 import simplejson as json
+from flask_babel import gettext as _
+from pandas import DataFrame
 
 from superset import app
-from superset.utils import core as utils
+from superset.exceptions import QueryObjectValidationError
+from superset.utils import core as utils, pandas_postprocessing
 from superset.views.utils import get_time_range_endpoints
 
 # TODO: Type Metrics dictionary with TypedDict when it becomes a vanilla python type
-# https://github.com/python/mypy/issues/5288
+#  https://github.com/python/mypy/issues/5288
 
 
 class QueryObject:
@@ -50,6 +53,7 @@ class QueryObject:
     extras: Dict
     columns: List[str]
     orderby: List[List]
+    post_processing: List[Dict[str, Any]]
 
     def __init__(
         self,
@@ -67,6 +71,7 @@ def __init__(
         extras: Optional[Dict] = None,
         columns: Optional[List[str]] = None,
         orderby: Optional[List[List]] = None,
+        post_processing: Optional[List[Dict[str, Any]]] = None,
         relative_start: str = app.config["DEFAULT_RELATIVE_START_TIME"],
         relative_end: str = app.config["DEFAULT_RELATIVE_END_TIME"],
     ):
@@ -81,8 +86,9 @@ def __init__(
         self.time_range = time_range
         self.time_shift = utils.parse_human_timedelta(time_shift)
         self.groupby = groupby or []
+        self.post_processing = post_processing or []
 
-        # Temporal solution for backward compatability issue due the new format of
+        # Temporary solution for backward compatibility issue due the new format of
         # non-ad-hoc metric which needs to adhere to superset-ui per
         # https://git.io/Jvm7P.
         self.metrics = [
@@ -138,9 +144,37 @@ def cache_key(self, **extra: Any) -> str:
         if self.time_range:
             cache_dict["time_range"] = self.time_range
         json_data = self.json_dumps(cache_dict, sort_keys=True)
+        if self.post_processing:
+            cache_dict["post_processing"] = self.post_processing
         return hashlib.md5(json_data.encode("utf-8")).hexdigest()
 
     def json_dumps(self, obj: Any, sort_keys: bool = False) -> str:
         return json.dumps(
             obj, default=utils.json_int_dttm_ser, ignore_nan=True, sort_keys=sort_keys
         )
+
+    def exec_post_processing(self, df: DataFrame) -> DataFrame:
+        """
+        Perform post processing operations on DataFrame.
+
+        :param df: DataFrame returned from database model.
+        :return: new DataFrame to which all post processing operations have been
+                 applied
+        :raises ChartDataValidationError: If the post processing operation in incorrect
+        """
+        for post_process in self.post_processing:
+            operation = post_process.get("operation")
+            if not operation:
+                raise QueryObjectValidationError(
+                    _("`operation` property of post processing object undefined")
+                )
+            if not hasattr(pandas_postprocessing, operation):
+                raise QueryObjectValidationError(
+                    _(
+                        "Unsupported post processing operation: %(operation)s",
+                        type=operation,
+                    )
+                )
+            options = post_process.get("options", {})
+            df = getattr(pandas_postprocessing, operation)(df, **options)
+        return df
diff --git a/superset/exceptions.py b/superset/exceptions.py
@@ -68,3 +68,7 @@ class CertificateException(SupersetException):
 
 class DatabaseNotFound(SupersetException):
     status = 400
+
+
+class QueryObjectValidationError(SupersetException):
+    status = 400