From d79dc99519468f365c51210436ad277757d12844 Mon Sep 17 00:00:00 2001 From: Jeffrey Rennie Date: Fri, 26 Jun 2020 14:59:14 -0700 Subject: [PATCH] fix: remove all dependencies on sloth (#649) * fix: remove all dependencies on sloth Also, move code that was copied and pasted twice into a common py file. fixes https://github.com/googleapis/synthtool/issues/645 * fix import path to list_split_repositories * consider a repo with most of its code written in Javascript or Typescript to be a nodejs repo and see if it has a synth.py file * remove obsolete addendum files * when querying a repo's language, ignore languages we don't care about like Shell * ignore archived repos --- autosynth/github.py | 62 ++++++++++- autosynth/multi.py | 5 +- autosynth/providers/java.py | 73 +------------ .../providers/list_split_repositories.py | 101 ++++++++++++++++++ autosynth/providers/nodejs.py | 34 +----- autosynth/providers/python.py | 39 +------ tests/test_github.py | 33 ++++++ 7 files changed, 204 insertions(+), 143 deletions(-) create mode 100644 autosynth/providers/list_split_repositories.py create mode 100644 tests/test_github.py diff --git a/autosynth/github.py b/autosynth/github.py index e4719885e..d8454754e 100644 --- a/autosynth/github.py +++ b/autosynth/github.py @@ -13,8 +13,18 @@ # limitations under the License. import base64 -from typing import Generator, Sequence, Dict, Optional, Union, List, cast +from typing import ( + Dict, + Generator, + List, + Optional, + Sequence, + Union, + cast, +) + import requests + from autosynth.log import logger _GITHUB_ROOT: str = "https://api.github.com" @@ -182,7 +192,13 @@ def list_files(self, repository: str, path: str, ref: str = None) -> Sequence[Di """ url = f"{_GITHUB_ROOT}/repos/{repository}/contents/{path}" response = self.session.get(url, params={"ref": ref}) - return cast(List[Dict], _get_json_or_raise_exception(response)) + try: + return cast(List[Dict], _get_json_or_raise_exception(response)) + except requests.exceptions.HTTPError as e: + if e.response.status_code == 404: + return [] + else: + raise def check_for_file(self, repository: str, path: str, ref: str = None) -> bool: """Check to see if a file exists in a given repository. @@ -353,6 +369,48 @@ def update_pull_labels( labels=list(label_names), ) + def list_repos(self, org: str) -> List[Dict]: + """Returns a list of all the repositories in an organization. + + See https://developer.github.com/v3/repos/#list-organization-repositories + + Args: + org (str): The name of the organization. + + Returns: + List[Dict]: The list of repository names. + """ + url = f"{_GITHUB_ROOT}/orgs/{org}/repos?type=public" + repos: List[Dict] = [] + while url: + response = self.session.get(url) + json = _get_json_or_raise_exception(response) + repos.extend(json) + url = response.links.get("next", {}).get("url") + return repos + + def get_languages(self, repository) -> Dict[str, int]: + """Returns the # of lines of code of each programming language in the repo. + + See: https://developer.github.com/v3/repos/#list-repository-languages + + Args: + repository {str} -- GitHub repository with the format [owner]/[repo] + + Returns: + Dict[str, int]: Map of programming language to lines of code. + """ + url = f"{_GITHUB_ROOT}/repos/{repository}/languages" + langs: Dict[str, int] = {} + + while url: + response = self.session.get(url) + json = _get_json_or_raise_exception(response) + langs.update(json) + + url = response.links.get("next", {}).get("url") + return langs + def get_labels(self, repository: str) -> Sequence[str]: """Returns labels for a repository. diff --git a/autosynth/multi.py b/autosynth/multi.py index 4ae47d8be..8bfdaec05 100644 --- a/autosynth/multi.py +++ b/autosynth/multi.py @@ -268,8 +268,9 @@ def load_config( try: provider = importlib.import_module(config) return provider.list_repositories() # type: ignore - except (ImportError, AttributeError): - pass + except (ImportError, AttributeError) as e: + logger.warning("Failed to load %s", config) + logger.warning("%s", e) return None diff --git a/autosynth/providers/java.py b/autosynth/providers/java.py index bdb8a1e37..48764fcd7 100644 --- a/autosynth/providers/java.py +++ b/autosynth/providers/java.py @@ -12,80 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json -import os - -from autosynth import github +from autosynth.providers.list_split_repositories import list_split_repositories def list_repositories(): - gh = github.GitHub(os.environ["GITHUB_TOKEN"]) - - return _google_cloud_java_repos(gh) + _other_repos(gh) - - -def _google_cloud_java_repos(gh): - # Presently this only enumerates folders from the google-cloud-java - # monorepo. - repo = "googleapis/google-cloud-java" - - clients = gh.list_files(repo, "google-cloud-clients") - subdirs = [item["path"] for item in clients if item["type"] == "dir"] - - # No hidden dirs - subdirs = [subdir for subdir in subdirs if not subdir.startswith(".")] - - # Only subdirs that have synth.py files. - subdirs = [ - subdir for subdir in subdirs if gh.check_for_file(repo, f"{subdir}/synth.py") - ] - - return [_config_for_subdir(repo, subdir) for subdir in subdirs] - - -def _other_repos(gh): - repos = _get_repo_list_from_sloth(gh) - repos = [repo for repo in repos if _is_java_synth_repo(gh, repo)] - - return [ - {"name": repo["repo"].split("/")[-1], "repository": repo["repo"]} - for repo in repos - ] - - -def _config_for_subdir(repo: str, subdir: str): - api = subdir.split("/")[1].replace("google-cloud-", "") - - return { - "name": api, - "repository": repo, - "synth-path": subdir, - "branch-suffix": api, - "pr-title": f"Regenerate {api} client", - } - - -def _get_repo_list_from_sloth(gh): - contents = gh.get_contents("googleapis/sloth", "repos.json") - repos = json.loads(contents)["repos"] - return repos - - -def _is_java_synth_repo(gh, repo): - # Only java repos. - if repo["language"] != "java": - return False - # No private repos. - if "private" in repo["repo"]: - return False - # Only repos with a synth.py in the top-level directory. - if not gh.check_for_file(repo["repo"], "synth.py"): - return False + repos = list_split_repositories("java", ("Java",)) # Ignore apiary services repo (has separate provider) - if repo["repo"] == "googleapis/google-api-java-client-services": - return False - - return True + return [repo for repo in repos if repo["name"] != "google-api-java-client-services"] if __name__ == "__main__": diff --git a/autosynth/providers/list_split_repositories.py b/autosynth/providers/list_split_repositories.py new file mode 100644 index 000000000..71ee55a35 --- /dev/null +++ b/autosynth/providers/list_split_repositories.py @@ -0,0 +1,101 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Dict, List, Sequence + +from autosynth import github + +"""Chunks that identify a repo from its name as belonging to a language. + +In other words, we can look at the repo name python-spanner and know that it's +for a python library because it contains the word 'python'. +""" +_SILVER_NAME_CHUNKS = ( + "nodejs", + "python", + "ruby", + "dotnet", + "php", + "java", + "go", + "elixir", +) + +"""Language names as reported by github.""" +_SILVER_LANGUAGE_NAMES = ( + "JavaScript", + "TypeScript", + "Python", + "Java", + "PHP", + "Ruby", + "Go", + "C#", + "Elixir", +) + + +def list_split_repositories( + repo_name_chunk: str, majority_languages: Sequence[str] = () +) -> List[Dict]: + """List github repos for a programming language. + + Args: + repo_name_chunk (str): return repos that have this chunk in the repo name. + Example: "nodejs" + majority_languages (Sequence[str], optional): return repos that have a majority + of their code written in one of these programming languages. + Example: ("JavaScript", "TypeScript") + + Returns: + List[Dict]: [description] + """ + + gh = github.GitHub(os.environ["GITHUB_TOKEN"]) + all_repos = set( + [repo["name"] for repo in gh.list_repos("googleapis") if not repo["archived"]] + ) + # Find repos with the language as part of the repo name. + lang_repos = set([repo for repo in all_repos if repo_name_chunk in repo.split("-")]) + if majority_languages: + # Ignore all repos whose name tags them for a language. + silver_name_chunks = set(_SILVER_NAME_CHUNKS) + all_lang_repos = set( + [ + repo + for repo in all_repos + if silver_name_chunks.intersection(set(repo.split("-"))) + ] + ) + # Find repos with the majority of their code written in the language. + silver_language_names = set(_SILVER_LANGUAGE_NAMES) + for repo in all_repos - all_lang_repos: + languages = gh.get_languages(f"googleapis/{repo}") + ranks = [ + (count, lang) + for (lang, count) in languages.items() + # Ignore languages we don't care about, like Shell. + if lang in silver_language_names + ] + if ranks and max(ranks)[1] in majority_languages: + lang_repos.add(repo) + + synth_repos = [] + for repo in sorted(lang_repos): + # Only repos with a synth.py in the top-level directory. + if not gh.list_files(f"googleapis/{repo}", "synth.py"): + continue + synth_repos.append({"name": repo, "repository": f"googleapis/{repo}"}) + return synth_repos diff --git a/autosynth/providers/nodejs.py b/autosynth/providers/nodejs.py index b9bb084b2..3171e460a 100644 --- a/autosynth/providers/nodejs.py +++ b/autosynth/providers/nodejs.py @@ -12,41 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json -import os - -from autosynth import github - - -def _get_repo_list_from_sloth(gh): - contents = gh.get_contents("googleapis/sloth", "repos.json") - repos = json.loads(contents)["repos"] - return repos - - -def _is_nodejs_synth_repo(gh, repo): - # Only nodejs repos. - if repo["language"] != "nodejs": - return False - # No private repos. - if "private" in repo["repo"]: - return False - # Only repos with a synth.py in the top-level directory. - if not gh.check_for_file(repo["repo"], "synth.py"): - return False - - return True +from autosynth.providers.list_split_repositories import list_split_repositories def list_repositories(): - gh = github.GitHub(os.environ["GITHUB_TOKEN"]) - repos = _get_repo_list_from_sloth(gh) - repos = [repo for repo in repos if _is_nodejs_synth_repo(gh, repo)] - - return [ - {"name": repo["repo"].split("/")[-1], "repository": repo["repo"]} - for repo in repos - ] + return list_split_repositories("nodejs", ("JavaScript", "TypeScript")) if __name__ == "__main__": diff --git a/autosynth/providers/python.py b/autosynth/providers/python.py index 68bd49bb6..ec380b81e 100644 --- a/autosynth/providers/python.py +++ b/autosynth/providers/python.py @@ -12,46 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json -import os - -from autosynth import github - - -def _get_repo_list_from_sloth(gh): - contents = gh.get_contents("googleapis/sloth", "repos.json") - repos = json.loads(contents)["repos"] - return repos - - -def _is_python_synth_repo(gh, repo): - """Finds Python repositories with synth files in the top-level directory.""" - # Only python repos. - if repo["language"] != "python": - return False - # No private repos. - if "private" in repo["repo"]: - return False - # Only repos with a synth.py in the top-level directory. - if not gh.check_for_file(repo["repo"], "synth.py"): - return False - - return True +from autosynth.providers.list_split_repositories import list_split_repositories def list_repositories(): - """Finds repositories with a `synth.py` in the top-level""" - gh = github.GitHub(os.environ["GITHUB_TOKEN"]) - - repos = _get_repo_list_from_sloth(gh) - repos = [repo for repo in repos if _is_python_synth_repo(gh, repo)] - - repo_list = [ - {"name": repo["repo"].split("/")[-1], "repository": repo["repo"]} - for repo in repos - ] - - return repo_list + return list_split_repositories("python", ("Python",)) if __name__ == "__main__": diff --git a/tests/test_github.py b/tests/test_github.py new file mode 100644 index 000000000..49e97e1dc --- /dev/null +++ b/tests/test_github.py @@ -0,0 +1,33 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +import os + +from autosynth import github + +requires_github_token = unittest.skipIf( + not os.environ.get("GITHUB_TOKEN", False), + "Set the environment variable GITHUB_TOKEN to run this test.", +) + + +def new_gh(): + return github.GitHub(os.environ["GITHUB_TOKEN"]) + + +@requires_github_token +def test_list_repos(): + gh = new_gh() + repos = gh.list_repos("googleapis") + assert len(repos) > 10