Merge pull request #5197 from StackStorm/api_endpoint_exclude_result_if_size_greater

Kami · web-flow · commit 9909f2bbb7b8 · 2021-03-31T22:22:38.000+02:00
Implement coditional execution "result" field retrieval for the "/v1/executions/&lt;id&gt;" (get one execution) API endpoint
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -173,6 +173,18 @@ Improvements
 
   Contributed by @Kami.
 
+* Add new ``?max_result_size`` query parameter filter to the ``GET /v1/executiond/<id>`` API
+  endpoint.
+
+  This query parameter allows clients to implement conditional execution result retrieval and
+  only retrieve the result field if it's smaller than the provided value.
+
+  This comes handy in the various client scenarios (such as st2web) where we don't display and
+  render very large results directly since it allows to speed things up and decrease amount of
+  data retrieved and parsed. (improvement) #5197
+
+  Contributed by @Kami.
+
 Fixed
 ~~~~~
 
diff --git a/st2api/st2api/controllers/resource.py b/st2api/st2api/controllers/resource.py
@@ -299,16 +299,23 @@ def _get_one_by_id(
         exclude_fields=None,
         include_fields=None,
         from_model_kwargs=None,
+        get_by_id_kwargs=None,
     ):
         """
         :param exclude_fields: A list of object fields to exclude.
         :type exclude_fields: ``list``
         :param include_fields: A list of object fields to include.
         :type include_fields: ``list``
+        :param get_by_id_kwargs: Additional keyword arguments which are passed to the
+                                 "_get_by_id()" method.
+        :type get_by_id_kwargs: ``dict`` or ``None``
         """
 
         instance = self._get_by_id(
-            resource_id=id, exclude_fields=exclude_fields, include_fields=include_fields
+            resource_id=id,
+            exclude_fields=exclude_fields,
+            include_fields=include_fields,
+            **get_by_id_kwargs or {},
         )
 
         if permission_type:
diff --git a/st2api/st2api/controllers/v1/actionexecutions.py b/st2api/st2api/controllers/v1/actionexecutions.py
@@ -13,6 +13,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Optional
+
 import copy
 import re
 import sys
@@ -24,6 +26,7 @@
 import jsonschema
 from oslo_config import cfg
 from six.moves import http_client
+from mongoengine.queryset.visitor import Q
 
 from st2api.controllers.base import BaseRestControllerMixin
 from st2api.controllers.resource import ResourceController
@@ -728,6 +731,7 @@ def get_one(
         exclude_attributes=None,
         include_attributes=None,
         show_secrets=False,
+        max_result_size=None,
     ):
         """
         Retrieve a single execution.
@@ -751,6 +755,10 @@ def get_one(
             )
         }
 
+        max_result_size = self._validate_max_result_size(
+            max_result_size=max_result_size
+        )
+
         # Special case for id == "last"
         if id == "last":
             execution_db = (
@@ -769,6 +777,7 @@ def get_one(
             requester_user=requester_user,
             from_model_kwargs=from_model_kwargs,
             permission_type=PermissionType.EXECUTION_VIEW,
+            get_by_id_kwargs={"max_result_size": max_result_size},
         )
 
     def post(
@@ -980,6 +989,94 @@ def delete(self, id, requester_user, show_secrets=False):
             execution_db, mask_secrets=from_model_kwargs["mask_secrets"]
         )
 
+    def _validate_max_result_size(
+        self, max_result_size: Optional[int]
+    ) -> Optional[int]:
+        """
+        Validate value of the ?max_result_size query parameter (if provided).
+        """
+        # Maximum limit for MongoDB collection document is 16 MB and the field itself can't be
+        # larger than that obviously. And in reality due to the other fields, overhead, etc,
+        # 14 is the upper limit.
+        if not max_result_size:
+            return max_result_size
+
+        if max_result_size <= 0:
+            raise ValueError("max_result_size must be a positive number")
+
+        if max_result_size > 14 * 1024 * 1024:
+            raise ValueError(
+                "max_result_size query parameter must be smaller than 14 MB"
+            )
+
+        return max_result_size
+
+    def _get_by_id(
+        self,
+        resource_id,
+        exclude_fields=None,
+        include_fields=None,
+        max_result_size=None,
+    ):
+        """
+        Custom version of _get_by_id() which supports ?max_result_size pre-filtering and not
+        returning result field for executions which result size exceeds this threshold.
+
+        This functionality allows us to implement fast and efficient retrievals in st2web.
+        """
+        exclude_fields = exclude_fields or []
+        include_fields = include_fields or []
+
+        if not max_result_size:
+            # If max_result_size is not provided we don't perform any prefiltering and directly
+            # call parent method
+            execution_db = super(ActionExecutionsController, self)._get_by_id(
+                resource_id=resource_id,
+                exclude_fields=exclude_fields,
+                include_fields=include_fields,
+            )
+            return execution_db
+
+        # Special query where we check if result size is smaller than pre-defined or that field
+        # doesn't not exist (old executions) and only return the result if the condition is met.
+        # This allows us to implement fast and efficient retrievals of executions on the client
+        # st2web side where we don't want to retrieve and display result directly for executions
+        # with large results
+        # Keep in mind that the query itself is very fast and adds almost no overhead for API
+        # operations which pass this query parameter because we first filter on the ID (indexed
+        # field) and perform projection query with two tiny fields (based on real life testing it
+        # takes less than 3 ms in most scenarios).
+        execution_db = self.access.get(
+            Q(id=resource_id)
+            & (Q(result_size__lte=max_result_size) | Q(result_size__not__exists=True)),
+            only_fields=["id", "result_size"],
+        )
+
+        # if result is empty, this means that execution either doesn't exist or the result is
+        # larger than threshold which means we don't want to retrieve and return result to
+        # the end user to we set exclude_fields accordingly
+        if not execution_db:
+            LOG.debug(
+                "Execution with id %s and result_size < %s not found. This means "
+                "execution with this ID doesn't exist or result_size exceeds the "
+                "threshold. Result field will be excluded from the retrieval and "
+                "the response." % (resource_id, max_result_size)
+            )
+
+            if include_fields and "result" in include_fields:
+                include_fields.remove("result")
+            elif not include_fields:
+                exclude_fields += ["result"]
+
+        # Now call parent get by id with potentially modified include / exclude fields in case
+        # result should not be included
+        execution_db = super(ActionExecutionsController, self)._get_by_id(
+            resource_id=resource_id,
+            exclude_fields=exclude_fields,
+            include_fields=include_fields,
+        )
+        return execution_db
+
     def _get_action_executions(
         self,
         exclude_fields=None,
diff --git a/st2api/tests/unit/controllers/v1/test_executions.py b/st2api/tests/unit/controllers/v1/test_executions.py
@@ -45,6 +45,7 @@
 from st2common.util import crypto as crypto_utils
 from st2common.util import date as date_utils
 from st2common.util import isotime
+from st2common.util.jsonify import json_encode
 from st2api.controllers.v1.actionexecutions import ActionExecutionsController
 import st2common.validators.api.action as action_validator
 from st2tests.api import BaseActionExecutionControllerTestCase
@@ -351,6 +352,103 @@ def test_get_one(self):
         self.assertEqual(get_resp.status_int, 200)
         self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id)
 
+    def test_get_one_max_result_size_query_parameter(self):
+        data = copy.deepcopy(LIVE_ACTION_1)
+        post_resp = self._do_post(LIVE_ACTION_1)
+
+        actionexecution_id = self._get_actionexecution_id(post_resp)
+
+        # Update it with the result (this populates result and result size attributes)
+        data = {
+            "result": {"fooo": "a" * 1000},
+            "status": "succeeded",
+        }
+        actual_result_size = len(json_encode(data["result"]))
+
+        # NOTE: In real-life result_size is populdated in update_execution() method which is
+        # called in the end with the actual result
+        put_resp = self._do_put(actionexecution_id, data)
+        self.assertEqual(put_resp.json["result_size"], actual_result_size)
+        self.assertEqual(put_resp.json["result"], data["result"])
+
+        # 1. ?max_result_size query filter not provided
+        get_resp = self._do_get_one(actionexecution_id)
+        self.assertEqual(get_resp.status_int, 200)
+        self.assertEqual(get_resp.json["result"], data["result"])
+        self.assertEqual(get_resp.json["result_size"], actual_result_size)
+        self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id)
+
+        # 2. ?max_result_size > actual result size
+        get_resp = self._do_get_one(
+            actionexecution_id + "?max_result_size=%s" % (actual_result_size + 1)
+        )
+        self.assertEqual(get_resp.status_int, 200)
+        self.assertEqual(get_resp.json["result_size"], actual_result_size)
+        self.assertEqual(get_resp.json["result"], data["result"])
+        self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id)
+
+        # 3. ?max_result_size < actual result size - result field should not be returned
+        get_resp = self._do_get_one(
+            actionexecution_id + "?max_result_size=%s" % (actual_result_size - 1)
+        )
+        self.assertEqual(get_resp.status_int, 200)
+        self.assertEqual(get_resp.json["result_size"], actual_result_size)
+        self.assertTrue("result" not in get_resp.json)
+        self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id)
+
+        # 4. ?max_result_size < actual result size and ?include_attributes=result - result field
+        # should not be returned
+        get_resp = self._do_get_one(
+            actionexecution_id
+            + "?include_attributes=result,result_size&max_result_size=%s"
+            % (actual_result_size - 1)
+        )
+        self.assertEqual(get_resp.status_int, 200)
+        self.assertEqual(get_resp.json["result_size"], actual_result_size)
+        self.assertTrue("result" not in get_resp.json)
+        self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id)
+
+        # 5. ?max_result_size > actual result size and ?exclude_attributes=result - result field
+        # should not be returned
+        get_resp = self._do_get_one(
+            actionexecution_id
+            + "?include_attributes=result_size&exclude_attriubtes=result&max_result_size=%s"
+            % (actual_result_size - 1)
+        )
+        self.assertEqual(get_resp.status_int, 200)
+        self.assertEqual(get_resp.json["result_size"], actual_result_size)
+        self.assertTrue("result" not in get_resp.json)
+        self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id)
+
+        # 6. max_result_size is not a positive number
+        get_resp = self._do_get_one(
+            actionexecution_id + "?max_result_size=-100", expect_errors=True
+        )
+        self.assertEqual(get_resp.status_int, 400)
+        self.assertEqual(
+            get_resp.json["faultstring"], "max_result_size must be a positive number"
+        )
+
+        # 7. max_result_size is > max possible value
+        get_resp = self._do_get_one(
+            actionexecution_id + "?max_result_size=%s" % ((14 * 1024 * 1024) + 1),
+            expect_errors=True,
+        )
+        self.assertEqual(get_resp.status_int, 400)
+        self.assertEqual(
+            get_resp.json["faultstring"],
+            "max_result_size query parameter must be smaller than 14 MB",
+        )
+
+        # 8. ?max_result_size == actual result size - result should be returned
+        get_resp = self._do_get_one(
+            actionexecution_id + "?max_result_size=%s" % (actual_result_size)
+        )
+        self.assertEqual(get_resp.status_int, 200)
+        self.assertEqual(get_resp.json["result_size"], actual_result_size)
+        self.assertEqual(get_resp.json["result"], data["result"])
+        self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id)
+
     def test_get_all_id_query_param_filtering_success(self):
         post_resp = self._do_post(LIVE_ACTION_1)
         actionexecution_id = self._get_actionexecution_id(post_resp)
diff --git a/st2common/st2common/openapi.yaml b/st2common/st2common/openapi.yaml
@@ -1095,6 +1095,10 @@ paths:
           in: query
           description: Show secrets in plain text
           type: boolean
+        - name: max_result_size
+          in: query
+          type: integer
+          description: True to exclude result field from the response for executions which result field exceeds the provided size in bytes.
       x-parameters:
         - name: user
           in: context
diff --git a/st2common/st2common/openapi.yaml.j2 b/st2common/st2common/openapi.yaml.j2
@@ -1091,6 +1091,10 @@ paths:
           in: query
           description: Show secrets in plain text
           type: boolean
+        - name: max_result_size
+          in: query
+          type: integer
+          description: True to exclude result field from the response for executions which result field exceeds the provided size in bytes.
       x-parameters:
         - name: user
           in: context