lint; new unittests for improved coverage; fixed workflow parse logic…

… error
praetorian-inc · Apr 16, 2024 · 6131a8e · 6131a8e
1 parent f63a552
commit 6131a8e
Show file tree

Hide file tree

Showing 16 changed files with 307 additions and 112 deletions.
diff --git a/gato/attack/attack.py b/gato/attack/attack.py
@@ -107,7 +107,7 @@ def __decrypt_secrets(priv_key, blob: str):
 
         encrypted_key = base64.b64decode(blob[1][1:-1])
         sym_key_b64 = priv_key.decrypt(encrypted_key,
-                                        padding.PKCS1v15()).decode()
+                                       padding.PKCS1v15()).decode()
         sym_key = base64.b64decode(sym_key_b64)
 
         derived_key = hashlib.pbkdf2_hmac('sha256', sym_key, salt, 10000, 48)

diff --git a/gato/enumerate/enumerate.py b/gato/enumerate/enumerate.py
@@ -174,9 +174,9 @@ def enumerate_organization(self, org: str):
             f"the {organization.name} organization!"
         )
 
-        Output.info(f"Querying and caching workflow YAML files!")
+        Output.info("Querying and caching workflow YAML files!")
         wf_queries = GqlQueries.get_workflow_ymls(enum_list)
-  
+
         for wf_query in wf_queries:
             result = self.org_e.api.call_post('/graphql', wf_query)
             # Sometimes we don't get a 200, fall back in this case.

diff --git a/gato/enumerate/repository.py b/gato/enumerate/repository.py
@@ -184,7 +184,7 @@ def construct_workflow_cache(self, yml_results):
         graphql and REST do not have parity, we still need to use rest for most
         enumeration calls. This method saves off all yml files, so during org
         level enumeration if we perform yml enumeration the cached file is used
-        instead of making github REST requests. 
+        instead of making github REST requests.
 
         Args:
             yml_results (list): List of results from individual GraphQL queries

diff --git a/gato/github/api.py b/gato/github/api.py
@@ -12,6 +12,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 class Api():
     """Class to serve as an abstraction layer to interact with the GitHub API.
     It handles utilizing proxies, along with passing the PAT and handling any
@@ -129,18 +130,18 @@ def __process_run_log(self, log_content: bytes, run_info: dict):
                         content = run_setup.read().decode()
                         content_lines = content.split('\n')
 
-                        if "Image Release: https://github.com/actions/runner-images" in content or \
-                            "Job is about to start running on the hosted runner: GitHub Actions" in content:
+                        if "Image Release: https://github.com/actions/runner-images" in content \
+                                or "Job is about to start running on the hosted runner: GitHub Actions" in content:
                             # Larger runners will appear to be self-hosted, but
                             # they will have the image name. Skip if we see this.
-                            # If the log contains "job is about to start running on hosted runner", 
-                            # the runner is a Github hosted runner so we can skip it. 
+                            # If the log contains "job is about to start running on hosted runner",
+                            # the runner is a Github hosted runner so we can skip it.
                             continue
                         index = 0
-                        while index < len(content_lines) and content_lines[index]: 
+                        while index < len(content_lines) and content_lines[index]:
                             line = content_lines[index]
 
-                            if "Requested labels: " in line: 
+                            if "Requested labels: " in line:
                                 labels = line.split("Requested labels: ")[1].split(', ')
 
                             if "Runner name: " in line:
@@ -149,7 +150,7 @@ def __process_run_log(self, log_content: bytes, run_info: dict):
                             if "Machine name: " in line:
                                 machine_name = line.split("Machine name: ")[1].replace("'", "")
 
-                            if "Runner group name:" in line: 
+                            if "Runner group name:" in line:
                                 runner_group = line.split("Runner group name: ")[1].replace("'", "")
 
                             if "Job is about to start running on" in line:
@@ -694,13 +695,13 @@ def retrieve_run_logs(self, repo_name: str, short_circuit: str = True):
         Returns:
             list: List of run logs for runs that ran on self-hosted runners.
         """
-        start_date = datetime.now() - timedelta(days = 60)
+        start_date = datetime.now() - timedelta(days=60)
         runs = self.call_get(
             f'/repos/{repo_name}/actions/runs', params={
                 "per_page": "30",
-                "status":"completed",
+                "status": "completed",
                 "exclude_pull_requests": "true",
-                "created":f">{start_date.isoformat()}"
+                "created": f">{start_date.isoformat()}"
             }
         )
 
@@ -714,7 +715,7 @@ def retrieve_run_logs(self, repo_name: str, short_circuit: str = True):
             for run in runs.json()['workflow_runs']:
                 # We are only interested in runs that actually executed.
                 if run['conclusion'] != 'success' and \
-                    run['conclusion'] != 'failure':
+                        run['conclusion'] != 'failure':
                     continue
 
                 if short_circuit:
@@ -723,7 +724,7 @@ def retrieve_run_logs(self, repo_name: str, short_circuit: str = True):
                     # we just need to look at each branch + wf combination once.
                     workflow_key = f"{run['head_branch']}:{run['path']}"
                     if workflow_key in names:
-                        continue                
+                        continue
                     names.add(workflow_key)
 
                 run_log = self.call_get(

diff --git a/gato/github/gql_queries.py b/gato/github/gql_queries.py
@@ -1,5 +1,3 @@
-from gato.models import Repository
-
 class GqlQueries():
     """Constructs graphql queries for use with the GitHub GraphQL api.
     """

diff --git a/gato/models/runner.py b/gato/models/runner.py
@@ -52,4 +52,4 @@ def toJSON(self):
             "non_ephemeral": self.non_ephemeral
         }
 
-        return representation
+        return representation
diff --git a/gato/workflow_parser/workflow_parser.py b/gato/workflow_parser/workflow_parser.py
@@ -22,12 +22,12 @@ class WorkflowParser():
         'macos-latest',
         'macOS-latest',
         'windows-latest',
-        'ubuntu-18.04', # deprecated, but we don't want false positives on older repos.
+        'ubuntu-18.04',  # deprecated, but we don't want false positives on older repos.
         'ubuntu-20.04',
         'ubuntu-22.04',
         'windows-2022',
         'windows-2019',
-        'windows-2016', # deprecated, but we don't want false positives on older repos.
+        'windows-2016',  # deprecated, but we don't want false positives on older repos.
         'macOS-13',
         'macOS-12',
         'macOS-11',
@@ -125,19 +125,15 @@ def self_hosted(self):
                 else:
                     if type(runs_on) == list:
                         for label in runs_on:
-                            if label in self.GITHUB_HOSTED_LABELS:
+                            if label not in self.GITHUB_HOSTED_LABELS and \
+                                    not re.match(self.LARGER_RUNNER_REGEX_LIST, label):
+                                sh_jobs.append((jobname, job_details))
                                 break
-                            if re.match(self.LARGER_RUNNER_REGEX_LIST, label):
-                                break
-                        else:
-                            sh_jobs.append((jobname, job_details))
                     elif type(runs_on) == str:
-                        if runs_on in self.GITHUB_HOSTED_LABELS:
-                            break
-                        if re.match(self.LARGER_RUNNER_REGEX_LIST, runs_on):
-                            break
+                        if runs_on in self.GITHUB_HOSTED_LABELS or \
+                                re.match(self.LARGER_RUNNER_REGEX_LIST, runs_on):
+                            continue
                         sh_jobs.append((jobname, job_details))
-
         return sh_jobs
 
     def analyze_entrypoints(self):