Send labels to Codecov only after collecting them

These changes alter the way label analysis command works by making 2 requests to Codecov instead of 1. Why would we want to do this, if it's more time to make a second request? Because it takes (usually) longer to collect the labels than it takes to make a second request. Specially for bigger repos, collecting the labels can take dozens of seconds. By making the label collection parallel with the labels processing we can speed up the total ATS time. We still PATCH the labels in case the worker hasn't finished the calculations, and it will correctly refresh the request labels from the DB (based on [here](https://github.com/codecov/worker/pull/1210)). However even if it misses the requested labels we recalculate the `absent_labels`, if needed (`_potentially_calculate_absent_labels`). So we still guarantee the exact same response everytime. Potentially faster. PS.: The file `test_instantiation.py` changes because of the linter
codecov · Jul 12, 2023 · 9324e83 · 9324e83
1 parent 9d671c5
commit 9324e83
Show file tree

Hide file tree

Showing 2 changed files with 234 additions and 46 deletions.
diff --git a/codecov_cli/commands/labelanalysis.py b/codecov_cli/commands/labelanalysis.py
@@ -90,73 +90,68 @@ def label_analysis(
         raise click.ClickException(
             click.style("Unable to run label analysis", fg="red")
         )
-    upload_url = enterprise_url or CODECOV_API_URL
-    url = f"{upload_url}/labels/labels-analysis"
-    token_header = f"Repotoken {token}"
 
     codecov_yaml = ctx.obj["codecov_yaml"] or {}
     cli_config = codecov_yaml.get("cli", {})
+    # Raises error if no runner is found
     runner = get_runner(cli_config, runner_name)
     logger.debug(
         f"Selected runner: {runner}",
         extra=dict(extra_log_attributes=dict(config=runner.params)),
     )
+
+    upload_url = enterprise_url or CODECOV_API_URL
+    url = f"{upload_url}/labels/labels-analysis"
+    token_header = f"Repotoken {token}"
+    payload = {
+        "base_commit": base_commit_sha,
+        "head_commit": head_commit_sha,
+        "requested_labels": None,
+    }
+    # Send the initial label analysis request without labels
+    # Because labels might take a long time to collect
+    eid = _send_labelanalysis_request(payload, url, token_header)
+
     logger.info("Collecting labels...")
     requested_labels = runner.collect_tests()
     logger.info(f"Collected {len(requested_labels)} tests")
     logger.debug(
         "Labels collected.",
         extra=dict(extra_log_attributes=dict(labels_collected=requested_labels)),
     )
-    payload = {
-        "base_commit": base_commit_sha,
-        "head_commit": head_commit_sha,
-        "requested_labels": requested_labels,
-    }
-    logger.info("Requesting set of labels to run...")
-    try:
-        response = requests.post(
-            url, json=payload, headers={"Authorization": token_header}
-        )
-        if response.status_code >= 500:
-            logger.warning(
-                "Sorry. Codecov is having problems",
-                extra=dict(extra_log_attributes=dict(status_code=response.status_code)),
-            )
+    payload["requested_labels"] = requested_labels
+
+    if eid:
+        # Initial request with no labels was successful
+        # Now we PATCH the labels in
+        patch_url = f"{upload_url}/labels/labels-analysis/{eid}"
+        _patch_labels(payload, patch_url, token_header)
+    else:
+        # Initial request with no labels failed
+        # Retry it
+        eid = _send_labelanalysis_request(payload, url, token_header)
+        if eid is None:
             _fallback_to_collected_labels(requested_labels, runner, dry_run=dry_run)
             return
-        if response.status_code >= 400:
-            logger.warning(
-                "Got a 4XX status code back from Codecov",
-                extra=dict(
-                    extra_log_attributes=dict(
-                        status_code=response.status_code, response_json=response.json()
-                    )
-                ),
-            )
-            raise click.ClickException(
-                "There is some problem with the submitted information"
-            )
-    except requests.RequestException:
-        raise click.ClickException(click.style("Unable to reach Codecov", fg="red"))
-    eid = response.json()["external_id"]
+
     has_result = False
     logger.info("Label request sent. Waiting for result.")
     start_wait = time.monotonic()
-    time.sleep(2)
+    time.sleep(1)
     while not has_result:
         resp_data = requests.get(
             f"{upload_url}/labels/labels-analysis/{eid}",
             headers={"Authorization": token_header},
         )
         resp_json = resp_data.json()
         if resp_json["state"] == "finished":
+            request_result = _potentially_calculate_absent_labels(
+                resp_data.json()["result"], requested_labels
+            )
             if not dry_run:
-                runner.process_labelanalysis_result(
-                    LabelAnalysisRequestResult(resp_data.json()["result"])
-                )
+                runner.process_labelanalysis_result(request_result)
             else:
-                _dry_run_output(LabelAnalysisRequestResult(resp_data.json()["result"]))
+                _dry_run_output(LabelAnalysisRequestResult(request_result))
             return
         if resp_json["state"] == "error":
             logger.error(
@@ -179,6 +174,65 @@ def label_analysis(
         time.sleep(5)
 
 
+def _potentially_calculate_absent_labels(
+    request_result, requested_labels
+) -> LabelAnalysisRequestResult:
+    if request_result["absent_labels"]:
+        return LabelAnalysisRequestResult(request_result)
+    requested_labels_set = set(requested_labels)
+    present_labels_set = set(request_result["present_report_labels"])
+    request_result["absent_labels"] = list(requested_labels_set - present_labels_set)
+    return LabelAnalysisRequestResult(request_result)
+
+
+def _patch_labels(payload, url, token_header):
+    logger.info("Sending collected labels to Codecov...")
+    try:
+        response = requests.patch(
+            url, json=payload, headers={"Authorization": token_header}
+        )
+        if response.status_code < 300:
+            logger.info("Labels successfully sent to Codecov")
+    except requests.RequestException:
+        raise click.ClickException(click.style("Unable to reach Codecov", fg="red"))
+
+
+def _send_labelanalysis_request(payload, url, token_header):
+    logger.info(
+        "Requesting set of labels to run...",
+        extra=dict(
+            extra_log_attributes=dict(
+                with_labels=(payload["requested_labels"] is not None)
+            )
+        ),
+    )
+    try:
+        response = requests.post(
+            url, json=payload, headers={"Authorization": token_header}
+        )
+        if response.status_code >= 500:
+            logger.warning(
+                "Sorry. Codecov is having problems",
+                extra=dict(extra_log_attributes=dict(status_code=response.status_code)),
+            )
+            return None
+        if response.status_code >= 400:
+            logger.warning(
+                "Got a 4XX status code back from Codecov",
+                extra=dict(
+                    extra_log_attributes=dict(
+                        status_code=response.status_code, response_json=response.json()
+                    )
+                ),
+            )
+            raise click.ClickException(
+                "There is some problem with the submitted information"
+            )
+    except requests.RequestException:
+        raise click.ClickException(click.style("Unable to reach Codecov", fg="red"))
+    return response.json()["external_id"]
+
+
 def _dry_run_output(result: LabelAnalysisRequestResult):
     logger.info(
         "Not executing tests because '--dry-run' is on. List of labels selected for running below."

diff --git a/tests/commands/test_invoke_labelanalysis.py b/tests/commands/test_invoke_labelanalysis.py
@@ -1,11 +1,16 @@
 import json
 
+import click
 import pytest
 import responses
 from click.testing import CliRunner
 from responses import matchers
 
-from codecov_cli.commands.labelanalysis import _fallback_to_collected_labels
+from codecov_cli.commands.labelanalysis import (
+    _fallback_to_collected_labels,
+    _potentially_calculate_absent_labels,
+    _send_labelanalysis_request,
+)
 from codecov_cli.commands.labelanalysis import time as labelanalysis_time
 from codecov_cli.main import cli
 from codecov_cli.runners.types import LabelAnalysisRequestResult
@@ -46,6 +51,49 @@ def get_labelanalysis_deps(mocker):
     }
 
 
+class TestLabelAnalysisNotInvoke(object):
+    def test_potentially_calculate_labels(self):
+        request_result = {
+            "present_report_labels": ["label_1", "label_2", "label_3"],
+            "absent_labels": [],
+            "present_diff_labels": ["label_2", "label_3"],
+            "global_level_labels": ["label_1"],
+        }
+        collected_labels = ["label_1", "label_2", "label_3", "label_4"]
+        expected = {**request_result, "absent_labels": ["label_4"]}
+        assert (
+            _potentially_calculate_absent_labels(request_result, collected_labels)
+            == expected
+        )
+        request_result["absent_labels"] = ["label_4", "label_5"]
+        expected["absent_labels"].append("label_5")
+        assert (
+            _potentially_calculate_absent_labels(request_result, collected_labels)
+            == expected
+        )
+
+    def test_send_label_analysis_bad_payload(self):
+        payload = {
+            "base_commit": "base_commit",
+            "head_commit": "head_commit",
+            "requested_labels": [],
+        }
+        url = "https://api.codecov.io/labels/labels-analysis"
+        header = "Repotoken STATIC_TOKEN"
+        with responses.RequestsMock() as rsps:
+            rsps.add(
+                responses.POST,
+                "https://api.codecov.io/labels/labels-analysis",
+                json={"error": "list field cannot be empty list"},
+                status=400,
+                match=[
+                    matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
+                ],
+            )
+            with pytest.raises(click.ClickException):
+                _send_labelanalysis_request(payload, url, header)
+
+
 class TestLabelAnalysisCommand(object):
     def test_labelanalysis_help(self, mocker, fake_ci_provider):
         mocker.patch("codecov_cli.main.get_ci_adapter", return_value=fake_ci_provider)
@@ -133,6 +181,15 @@ def test_invoke_label_analysis(self, get_labelanalysis_deps, mocker):
                     matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
                 ],
             )
+            rsps.add(
+                responses.PATCH,
+                "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
+                json={"external_id": "label-analysis-request-id"},
+                status=201,
+                match=[
+                    matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
+                ],
+            )
             rsps.add(
                 responses.GET,
                 "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
@@ -144,12 +201,12 @@ def test_invoke_label_analysis(self, get_labelanalysis_deps, mocker):
                 ["-v", "label-analysis", "--token=STATIC_TOKEN", "--base-sha=BASE_SHA"],
                 obj={},
             )
-            mock_get_runner.assert_called()
-            fake_runner.process_labelanalysis_result.assert_called_with(
-                label_analysis_result
-            )
-            print(result.output)
-        assert result.exit_code == 0
+            assert result.exit_code == 0
+        mock_get_runner.assert_called()
+        fake_runner.process_labelanalysis_result.assert_called_with(
+            label_analysis_result
+        )
+        print(result.output)
 
     def test_invoke_label_analysis_dry_run(self, get_labelanalysis_deps, mocker):
         mock_get_runner = get_labelanalysis_deps["mock_get_runner"]
@@ -172,6 +229,15 @@ def test_invoke_label_analysis_dry_run(self, get_labelanalysis_deps, mocker):
                     matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
                 ],
             )
+            rsps.add(
+                responses.PATCH,
+                "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
+                json={"external_id": "label-analysis-request-id"},
+                status=201,
+                match=[
+                    matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
+                ],
+            )
             rsps.add(
                 responses.GET,
                 "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
@@ -190,8 +256,8 @@ def test_invoke_label_analysis_dry_run(self, get_labelanalysis_deps, mocker):
             )
             mock_get_runner.assert_called()
             fake_runner.process_labelanalysis_result.assert_not_called()
-        assert result.exit_code == 0
         print(result.output)
+        assert result.exit_code == 0
         assert json.dumps(label_analysis_result) in result.output
 
     def test_fallback_to_collected_labels(self, mocker):
@@ -305,6 +371,15 @@ def test_fallback_collected_labels_codecov_error_processing_label_analysis(
                     matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
                 ],
             )
+            rsps.add(
+                responses.PATCH,
+                "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
+                json={"external_id": "label-analysis-request-id"},
+                status=201,
+                match=[
+                    matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
+                ],
+            )
             rsps.add(
                 responses.GET,
                 "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
@@ -346,6 +421,15 @@ def test_fallback_collected_labels_codecov_max_wait_time_exceeded(
                     matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
                 ],
             )
+            rsps.add(
+                responses.PATCH,
+                "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
+                json={"external_id": "label-analysis-request-id"},
+                status=201,
+                match=[
+                    matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
+                ],
+            )
             rsps.add(
                 responses.GET,
                 "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
@@ -374,3 +458,53 @@ def test_fallback_collected_labels_codecov_max_wait_time_exceeded(
                 "global_level_labels": [],
             }
         )
+
+    def test_first_labelanalysis_request_fails_but_second_works(
+        self, get_labelanalysis_deps, mocker
+    ):
+        mock_get_runner = get_labelanalysis_deps["mock_get_runner"]
+        fake_runner = get_labelanalysis_deps["fake_runner"]
+        collected_labels = get_labelanalysis_deps["collected_labels"]
+
+        label_analysis_result = {
+            "present_report_labels": ["test_present"],
+            "absent_labels": ["test_absent"],
+            "present_diff_labels": ["test_in_diff"],
+            "global_level_labels": ["test_global"],
+        }
+
+        with responses.RequestsMock() as rsps:
+            rsps.add(
+                responses.POST,
+                "https://api.codecov.io/labels/labels-analysis",
+                status=502,
+                match=[
+                    matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
+                ],
+            )
+            rsps.add(
+                responses.POST,
+                "https://api.codecov.io/labels/labels-analysis",
+                json={"external_id": "label-analysis-request-id"},
+                status=201,
+                match=[
+                    matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
+                ],
+            )
+            rsps.add(
+                responses.GET,
+                "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
+                json={"state": "finished", "result": label_analysis_result},
+            )
+            cli_runner = CliRunner()
+            result = cli_runner.invoke(
+                cli,
+                ["-v", "label-analysis", "--token=STATIC_TOKEN", "--base-sha=BASE_SHA"],
+                obj={},
+            )
+            assert result.exit_code == 0
+        mock_get_runner.assert_called()
+        fake_runner.process_labelanalysis_result.assert_called_with(
+            label_analysis_result
+        )
+        print(result.output)