From 9324e838260588bcd54d7c2671f4477ed9895ec5 Mon Sep 17 00:00:00 2001
From: Gguidini <giovanni@codecov.io>
Date: Thu, 6 Jul 2023 23:13:13 +0200
Subject: [PATCH] Send labels to Codecov only after collecting them

These changes alter the way label analysis command works by making 2 requests to Codecov instead of 1.

Why would we want to do this, if it's more time to make a second request?
Because it takes (usually) longer to collect the labels than it takes to make a second request.
Specially for bigger repos, collecting the labels can take dozens of seconds.
By making the label collection parallel with the labels processing we can speed up the total ATS time.

We still PATCH the labels in case the worker hasn't finished the calculations, and it will
correctly refresh the request labels from the DB (based on [here](https://github.com/codecov/worker/pull/1210)).
However even if it misses the requested labels we recalculate the `absent_labels`, if needed (`_potentially_calculate_absent_labels`).

So we still guarantee the exact same response everytime. Potentially faster.

PS.: The file `test_instantiation.py` changes because of the linter
---
 codecov_cli/commands/labelanalysis.py       | 130 ++++++++++++-----
 tests/commands/test_invoke_labelanalysis.py | 150 ++++++++++++++++++--
 2 files changed, 234 insertions(+), 46 deletions(-)

diff --git a/codecov_cli/commands/labelanalysis.py b/codecov_cli/commands/labelanalysis.py
index 1287ed81..5991e627 100644
--- a/codecov_cli/commands/labelanalysis.py
+++ b/codecov_cli/commands/labelanalysis.py
@@ -90,17 +90,28 @@ def label_analysis(
         raise click.ClickException(
             click.style("Unable to run label analysis", fg="red")
         )
-    upload_url = enterprise_url or CODECOV_API_URL
-    url = f"{upload_url}/labels/labels-analysis"
-    token_header = f"Repotoken {token}"
 
     codecov_yaml = ctx.obj["codecov_yaml"] or {}
     cli_config = codecov_yaml.get("cli", {})
+    # Raises error if no runner is found
     runner = get_runner(cli_config, runner_name)
     logger.debug(
         f"Selected runner: {runner}",
         extra=dict(extra_log_attributes=dict(config=runner.params)),
     )
+
+    upload_url = enterprise_url or CODECOV_API_URL
+    url = f"{upload_url}/labels/labels-analysis"
+    token_header = f"Repotoken {token}"
+    payload = {
+        "base_commit": base_commit_sha,
+        "head_commit": head_commit_sha,
+        "requested_labels": None,
+    }
+    # Send the initial label analysis request without labels
+    # Because labels might take a long time to collect
+    eid = _send_labelanalysis_request(payload, url, token_header)
+
     logger.info("Collecting labels...")
     requested_labels = runner.collect_tests()
     logger.info(f"Collected {len(requested_labels)} tests")
@@ -108,42 +119,25 @@ def label_analysis(
         "Labels collected.",
         extra=dict(extra_log_attributes=dict(labels_collected=requested_labels)),
     )
-    payload = {
-        "base_commit": base_commit_sha,
-        "head_commit": head_commit_sha,
-        "requested_labels": requested_labels,
-    }
-    logger.info("Requesting set of labels to run...")
-    try:
-        response = requests.post(
-            url, json=payload, headers={"Authorization": token_header}
-        )
-        if response.status_code >= 500:
-            logger.warning(
-                "Sorry. Codecov is having problems",
-                extra=dict(extra_log_attributes=dict(status_code=response.status_code)),
-            )
+    payload["requested_labels"] = requested_labels
+
+    if eid:
+        # Initial request with no labels was successful
+        # Now we PATCH the labels in
+        patch_url = f"{upload_url}/labels/labels-analysis/{eid}"
+        _patch_labels(payload, patch_url, token_header)
+    else:
+        # Initial request with no labels failed
+        # Retry it
+        eid = _send_labelanalysis_request(payload, url, token_header)
+        if eid is None:
             _fallback_to_collected_labels(requested_labels, runner, dry_run=dry_run)
             return
-        if response.status_code >= 400:
-            logger.warning(
-                "Got a 4XX status code back from Codecov",
-                extra=dict(
-                    extra_log_attributes=dict(
-                        status_code=response.status_code, response_json=response.json()
-                    )
-                ),
-            )
-            raise click.ClickException(
-                "There is some problem with the submitted information"
-            )
-    except requests.RequestException:
-        raise click.ClickException(click.style("Unable to reach Codecov", fg="red"))
-    eid = response.json()["external_id"]
+
     has_result = False
     logger.info("Label request sent. Waiting for result.")
     start_wait = time.monotonic()
-    time.sleep(2)
+    time.sleep(1)
     while not has_result:
         resp_data = requests.get(
             f"{upload_url}/labels/labels-analysis/{eid}",
@@ -151,12 +145,13 @@ def label_analysis(
         )
         resp_json = resp_data.json()
         if resp_json["state"] == "finished":
+            request_result = _potentially_calculate_absent_labels(
+                resp_data.json()["result"], requested_labels
+            )
             if not dry_run:
-                runner.process_labelanalysis_result(
-                    LabelAnalysisRequestResult(resp_data.json()["result"])
-                )
+                runner.process_labelanalysis_result(request_result)
             else:
-                _dry_run_output(LabelAnalysisRequestResult(resp_data.json()["result"]))
+                _dry_run_output(LabelAnalysisRequestResult(request_result))
             return
         if resp_json["state"] == "error":
             logger.error(
@@ -179,6 +174,65 @@ def label_analysis(
         time.sleep(5)
 
 
+def _potentially_calculate_absent_labels(
+    request_result, requested_labels
+) -> LabelAnalysisRequestResult:
+    if request_result["absent_labels"]:
+        return LabelAnalysisRequestResult(request_result)
+    requested_labels_set = set(requested_labels)
+    present_labels_set = set(request_result["present_report_labels"])
+    request_result["absent_labels"] = list(requested_labels_set - present_labels_set)
+    return LabelAnalysisRequestResult(request_result)
+
+
+def _patch_labels(payload, url, token_header):
+    logger.info("Sending collected labels to Codecov...")
+    try:
+        response = requests.patch(
+            url, json=payload, headers={"Authorization": token_header}
+        )
+        if response.status_code < 300:
+            logger.info("Labels successfully sent to Codecov")
+    except requests.RequestException:
+        raise click.ClickException(click.style("Unable to reach Codecov", fg="red"))
+
+
+def _send_labelanalysis_request(payload, url, token_header):
+    logger.info(
+        "Requesting set of labels to run...",
+        extra=dict(
+            extra_log_attributes=dict(
+                with_labels=(payload["requested_labels"] is not None)
+            )
+        ),
+    )
+    try:
+        response = requests.post(
+            url, json=payload, headers={"Authorization": token_header}
+        )
+        if response.status_code >= 500:
+            logger.warning(
+                "Sorry. Codecov is having problems",
+                extra=dict(extra_log_attributes=dict(status_code=response.status_code)),
+            )
+            return None
+        if response.status_code >= 400:
+            logger.warning(
+                "Got a 4XX status code back from Codecov",
+                extra=dict(
+                    extra_log_attributes=dict(
+                        status_code=response.status_code, response_json=response.json()
+                    )
+                ),
+            )
+            raise click.ClickException(
+                "There is some problem with the submitted information"
+            )
+    except requests.RequestException:
+        raise click.ClickException(click.style("Unable to reach Codecov", fg="red"))
+    return response.json()["external_id"]
+
+
 def _dry_run_output(result: LabelAnalysisRequestResult):
     logger.info(
         "Not executing tests because '--dry-run' is on. List of labels selected for running below."
diff --git a/tests/commands/test_invoke_labelanalysis.py b/tests/commands/test_invoke_labelanalysis.py
index 4726b1b7..72cbbd42 100644
--- a/tests/commands/test_invoke_labelanalysis.py
+++ b/tests/commands/test_invoke_labelanalysis.py
@@ -1,11 +1,16 @@
 import json
 
+import click
 import pytest
 import responses
 from click.testing import CliRunner
 from responses import matchers
 
-from codecov_cli.commands.labelanalysis import _fallback_to_collected_labels
+from codecov_cli.commands.labelanalysis import (
+    _fallback_to_collected_labels,
+    _potentially_calculate_absent_labels,
+    _send_labelanalysis_request,
+)
 from codecov_cli.commands.labelanalysis import time as labelanalysis_time
 from codecov_cli.main import cli
 from codecov_cli.runners.types import LabelAnalysisRequestResult
@@ -46,6 +51,49 @@ def get_labelanalysis_deps(mocker):
     }
 
 
+class TestLabelAnalysisNotInvoke(object):
+    def test_potentially_calculate_labels(self):
+        request_result = {
+            "present_report_labels": ["label_1", "label_2", "label_3"],
+            "absent_labels": [],
+            "present_diff_labels": ["label_2", "label_3"],
+            "global_level_labels": ["label_1"],
+        }
+        collected_labels = ["label_1", "label_2", "label_3", "label_4"]
+        expected = {**request_result, "absent_labels": ["label_4"]}
+        assert (
+            _potentially_calculate_absent_labels(request_result, collected_labels)
+            == expected
+        )
+        request_result["absent_labels"] = ["label_4", "label_5"]
+        expected["absent_labels"].append("label_5")
+        assert (
+            _potentially_calculate_absent_labels(request_result, collected_labels)
+            == expected
+        )
+
+    def test_send_label_analysis_bad_payload(self):
+        payload = {
+            "base_commit": "base_commit",
+            "head_commit": "head_commit",
+            "requested_labels": [],
+        }
+        url = "https://api.codecov.io/labels/labels-analysis"
+        header = "Repotoken STATIC_TOKEN"
+        with responses.RequestsMock() as rsps:
+            rsps.add(
+                responses.POST,
+                "https://api.codecov.io/labels/labels-analysis",
+                json={"error": "list field cannot be empty list"},
+                status=400,
+                match=[
+                    matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
+                ],
+            )
+            with pytest.raises(click.ClickException):
+                _send_labelanalysis_request(payload, url, header)
+
+
 class TestLabelAnalysisCommand(object):
     def test_labelanalysis_help(self, mocker, fake_ci_provider):
         mocker.patch("codecov_cli.main.get_ci_adapter", return_value=fake_ci_provider)
@@ -133,6 +181,15 @@ def test_invoke_label_analysis(self, get_labelanalysis_deps, mocker):
                     matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
                 ],
             )
+            rsps.add(
+                responses.PATCH,
+                "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
+                json={"external_id": "label-analysis-request-id"},
+                status=201,
+                match=[
+                    matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
+                ],
+            )
             rsps.add(
                 responses.GET,
                 "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
@@ -144,12 +201,12 @@ def test_invoke_label_analysis(self, get_labelanalysis_deps, mocker):
                 ["-v", "label-analysis", "--token=STATIC_TOKEN", "--base-sha=BASE_SHA"],
                 obj={},
             )
-            mock_get_runner.assert_called()
-            fake_runner.process_labelanalysis_result.assert_called_with(
-                label_analysis_result
-            )
-            print(result.output)
-        assert result.exit_code == 0
+            assert result.exit_code == 0
+        mock_get_runner.assert_called()
+        fake_runner.process_labelanalysis_result.assert_called_with(
+            label_analysis_result
+        )
+        print(result.output)
 
     def test_invoke_label_analysis_dry_run(self, get_labelanalysis_deps, mocker):
         mock_get_runner = get_labelanalysis_deps["mock_get_runner"]
@@ -172,6 +229,15 @@ def test_invoke_label_analysis_dry_run(self, get_labelanalysis_deps, mocker):
                     matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
                 ],
             )
+            rsps.add(
+                responses.PATCH,
+                "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
+                json={"external_id": "label-analysis-request-id"},
+                status=201,
+                match=[
+                    matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
+                ],
+            )
             rsps.add(
                 responses.GET,
                 "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
@@ -190,8 +256,8 @@ def test_invoke_label_analysis_dry_run(self, get_labelanalysis_deps, mocker):
             )
             mock_get_runner.assert_called()
             fake_runner.process_labelanalysis_result.assert_not_called()
-        assert result.exit_code == 0
         print(result.output)
+        assert result.exit_code == 0
         assert json.dumps(label_analysis_result) in result.output
 
     def test_fallback_to_collected_labels(self, mocker):
@@ -305,6 +371,15 @@ def test_fallback_collected_labels_codecov_error_processing_label_analysis(
                     matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
                 ],
             )
+            rsps.add(
+                responses.PATCH,
+                "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
+                json={"external_id": "label-analysis-request-id"},
+                status=201,
+                match=[
+                    matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
+                ],
+            )
             rsps.add(
                 responses.GET,
                 "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
@@ -346,6 +421,15 @@ def test_fallback_collected_labels_codecov_max_wait_time_exceeded(
                     matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
                 ],
             )
+            rsps.add(
+                responses.PATCH,
+                "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
+                json={"external_id": "label-analysis-request-id"},
+                status=201,
+                match=[
+                    matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
+                ],
+            )
             rsps.add(
                 responses.GET,
                 "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
@@ -374,3 +458,53 @@ def test_fallback_collected_labels_codecov_max_wait_time_exceeded(
                 "global_level_labels": [],
             }
         )
+
+    def test_first_labelanalysis_request_fails_but_second_works(
+        self, get_labelanalysis_deps, mocker
+    ):
+        mock_get_runner = get_labelanalysis_deps["mock_get_runner"]
+        fake_runner = get_labelanalysis_deps["fake_runner"]
+        collected_labels = get_labelanalysis_deps["collected_labels"]
+
+        label_analysis_result = {
+            "present_report_labels": ["test_present"],
+            "absent_labels": ["test_absent"],
+            "present_diff_labels": ["test_in_diff"],
+            "global_level_labels": ["test_global"],
+        }
+
+        with responses.RequestsMock() as rsps:
+            rsps.add(
+                responses.POST,
+                "https://api.codecov.io/labels/labels-analysis",
+                status=502,
+                match=[
+                    matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
+                ],
+            )
+            rsps.add(
+                responses.POST,
+                "https://api.codecov.io/labels/labels-analysis",
+                json={"external_id": "label-analysis-request-id"},
+                status=201,
+                match=[
+                    matchers.header_matcher({"Authorization": "Repotoken STATIC_TOKEN"})
+                ],
+            )
+            rsps.add(
+                responses.GET,
+                "https://api.codecov.io/labels/labels-analysis/label-analysis-request-id",
+                json={"state": "finished", "result": label_analysis_result},
+            )
+            cli_runner = CliRunner()
+            result = cli_runner.invoke(
+                cli,
+                ["-v", "label-analysis", "--token=STATIC_TOKEN", "--base-sha=BASE_SHA"],
+                obj={},
+            )
+            assert result.exit_code == 0
+        mock_get_runner.assert_called()
+        fake_runner.process_labelanalysis_result.assert_called_with(
+            label_analysis_result
+        )
+        print(result.output)