Skip to content

Commit

Permalink
fix: remove all dependencies on sloth (#649)
Browse files Browse the repository at this point in the history
* fix: remove all dependencies on sloth

Also, move code that was copied and pasted twice into a common py file.

fixes #645

* fix import path to list_split_repositories

* consider a repo with most of its code written in Javascript or Typescript to be a nodejs repo and see if it has a synth.py file

* remove obsolete addendum files

* when querying a repo's language, ignore languages we don't care about like Shell

* ignore archived repos
  • Loading branch information
SurferJeffAtGoogle authored Jun 26, 2020
1 parent 652d446 commit d79dc99
Show file tree
Hide file tree
Showing 7 changed files with 204 additions and 143 deletions.
62 changes: 60 additions & 2 deletions autosynth/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,18 @@
# limitations under the License.

import base64
from typing import Generator, Sequence, Dict, Optional, Union, List, cast
from typing import (
Dict,
Generator,
List,
Optional,
Sequence,
Union,
cast,
)

import requests

from autosynth.log import logger

_GITHUB_ROOT: str = "https://api.github.com"
Expand Down Expand Up @@ -182,7 +192,13 @@ def list_files(self, repository: str, path: str, ref: str = None) -> Sequence[Di
"""
url = f"{_GITHUB_ROOT}/repos/{repository}/contents/{path}"
response = self.session.get(url, params={"ref": ref})
return cast(List[Dict], _get_json_or_raise_exception(response))
try:
return cast(List[Dict], _get_json_or_raise_exception(response))
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
return []
else:
raise

def check_for_file(self, repository: str, path: str, ref: str = None) -> bool:
"""Check to see if a file exists in a given repository.
Expand Down Expand Up @@ -353,6 +369,48 @@ def update_pull_labels(
labels=list(label_names),
)

def list_repos(self, org: str) -> List[Dict]:
"""Returns a list of all the repositories in an organization.
See https://developer.github.com/v3/repos/#list-organization-repositories
Args:
org (str): The name of the organization.
Returns:
List[Dict]: The list of repository names.
"""
url = f"{_GITHUB_ROOT}/orgs/{org}/repos?type=public"
repos: List[Dict] = []
while url:
response = self.session.get(url)
json = _get_json_or_raise_exception(response)
repos.extend(json)
url = response.links.get("next", {}).get("url")
return repos

def get_languages(self, repository) -> Dict[str, int]:
"""Returns the # of lines of code of each programming language in the repo.
See: https://developer.github.com/v3/repos/#list-repository-languages
Args:
repository {str} -- GitHub repository with the format [owner]/[repo]
Returns:
Dict[str, int]: Map of programming language to lines of code.
"""
url = f"{_GITHUB_ROOT}/repos/{repository}/languages"
langs: Dict[str, int] = {}

while url:
response = self.session.get(url)
json = _get_json_or_raise_exception(response)
langs.update(json)

url = response.links.get("next", {}).get("url")
return langs

def get_labels(self, repository: str) -> Sequence[str]:
"""Returns labels for a repository.
Expand Down
5 changes: 3 additions & 2 deletions autosynth/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,9 @@ def load_config(
try:
provider = importlib.import_module(config)
return provider.list_repositories() # type: ignore
except (ImportError, AttributeError):
pass
except (ImportError, AttributeError) as e:
logger.warning("Failed to load %s", config)
logger.warning("%s", e)
return None


Expand Down
73 changes: 3 additions & 70 deletions autosynth/providers/java.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,80 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import os

from autosynth import github
from autosynth.providers.list_split_repositories import list_split_repositories


def list_repositories():
gh = github.GitHub(os.environ["GITHUB_TOKEN"])

return _google_cloud_java_repos(gh) + _other_repos(gh)


def _google_cloud_java_repos(gh):
# Presently this only enumerates folders from the google-cloud-java
# monorepo.
repo = "googleapis/google-cloud-java"

clients = gh.list_files(repo, "google-cloud-clients")
subdirs = [item["path"] for item in clients if item["type"] == "dir"]

# No hidden dirs
subdirs = [subdir for subdir in subdirs if not subdir.startswith(".")]

# Only subdirs that have synth.py files.
subdirs = [
subdir for subdir in subdirs if gh.check_for_file(repo, f"{subdir}/synth.py")
]

return [_config_for_subdir(repo, subdir) for subdir in subdirs]


def _other_repos(gh):
repos = _get_repo_list_from_sloth(gh)
repos = [repo for repo in repos if _is_java_synth_repo(gh, repo)]

return [
{"name": repo["repo"].split("/")[-1], "repository": repo["repo"]}
for repo in repos
]


def _config_for_subdir(repo: str, subdir: str):
api = subdir.split("/")[1].replace("google-cloud-", "")

return {
"name": api,
"repository": repo,
"synth-path": subdir,
"branch-suffix": api,
"pr-title": f"Regenerate {api} client",
}


def _get_repo_list_from_sloth(gh):
contents = gh.get_contents("googleapis/sloth", "repos.json")
repos = json.loads(contents)["repos"]
return repos


def _is_java_synth_repo(gh, repo):
# Only java repos.
if repo["language"] != "java":
return False
# No private repos.
if "private" in repo["repo"]:
return False
# Only repos with a synth.py in the top-level directory.
if not gh.check_for_file(repo["repo"], "synth.py"):
return False
repos = list_split_repositories("java", ("Java",))
# Ignore apiary services repo (has separate provider)
if repo["repo"] == "googleapis/google-api-java-client-services":
return False

return True
return [repo for repo in repos if repo["name"] != "google-api-java-client-services"]


if __name__ == "__main__":
Expand Down
101 changes: 101 additions & 0 deletions autosynth/providers/list_split_repositories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from typing import Dict, List, Sequence

from autosynth import github

"""Chunks that identify a repo from its name as belonging to a language.
In other words, we can look at the repo name python-spanner and know that it's
for a python library because it contains the word 'python'.
"""
_SILVER_NAME_CHUNKS = (
"nodejs",
"python",
"ruby",
"dotnet",
"php",
"java",
"go",
"elixir",
)

"""Language names as reported by github."""
_SILVER_LANGUAGE_NAMES = (
"JavaScript",
"TypeScript",
"Python",
"Java",
"PHP",
"Ruby",
"Go",
"C#",
"Elixir",
)


def list_split_repositories(
repo_name_chunk: str, majority_languages: Sequence[str] = ()
) -> List[Dict]:
"""List github repos for a programming language.
Args:
repo_name_chunk (str): return repos that have this chunk in the repo name.
Example: "nodejs"
majority_languages (Sequence[str], optional): return repos that have a majority
of their code written in one of these programming languages.
Example: ("JavaScript", "TypeScript")
Returns:
List[Dict]: [description]
"""

gh = github.GitHub(os.environ["GITHUB_TOKEN"])
all_repos = set(
[repo["name"] for repo in gh.list_repos("googleapis") if not repo["archived"]]
)
# Find repos with the language as part of the repo name.
lang_repos = set([repo for repo in all_repos if repo_name_chunk in repo.split("-")])
if majority_languages:
# Ignore all repos whose name tags them for a language.
silver_name_chunks = set(_SILVER_NAME_CHUNKS)
all_lang_repos = set(
[
repo
for repo in all_repos
if silver_name_chunks.intersection(set(repo.split("-")))
]
)
# Find repos with the majority of their code written in the language.
silver_language_names = set(_SILVER_LANGUAGE_NAMES)
for repo in all_repos - all_lang_repos:
languages = gh.get_languages(f"googleapis/{repo}")
ranks = [
(count, lang)
for (lang, count) in languages.items()
# Ignore languages we don't care about, like Shell.
if lang in silver_language_names
]
if ranks and max(ranks)[1] in majority_languages:
lang_repos.add(repo)

synth_repos = []
for repo in sorted(lang_repos):
# Only repos with a synth.py in the top-level directory.
if not gh.list_files(f"googleapis/{repo}", "synth.py"):
continue
synth_repos.append({"name": repo, "repository": f"googleapis/{repo}"})
return synth_repos
34 changes: 2 additions & 32 deletions autosynth/providers/nodejs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,41 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import os

from autosynth import github


def _get_repo_list_from_sloth(gh):
contents = gh.get_contents("googleapis/sloth", "repos.json")
repos = json.loads(contents)["repos"]
return repos


def _is_nodejs_synth_repo(gh, repo):
# Only nodejs repos.
if repo["language"] != "nodejs":
return False
# No private repos.
if "private" in repo["repo"]:
return False
# Only repos with a synth.py in the top-level directory.
if not gh.check_for_file(repo["repo"], "synth.py"):
return False

return True
from autosynth.providers.list_split_repositories import list_split_repositories


def list_repositories():
gh = github.GitHub(os.environ["GITHUB_TOKEN"])
repos = _get_repo_list_from_sloth(gh)
repos = [repo for repo in repos if _is_nodejs_synth_repo(gh, repo)]

return [
{"name": repo["repo"].split("/")[-1], "repository": repo["repo"]}
for repo in repos
]
return list_split_repositories("nodejs", ("JavaScript", "TypeScript"))


if __name__ == "__main__":
Expand Down
39 changes: 2 additions & 37 deletions autosynth/providers/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,46 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import os

from autosynth import github


def _get_repo_list_from_sloth(gh):
contents = gh.get_contents("googleapis/sloth", "repos.json")
repos = json.loads(contents)["repos"]
return repos


def _is_python_synth_repo(gh, repo):
"""Finds Python repositories with synth files in the top-level directory."""
# Only python repos.
if repo["language"] != "python":
return False
# No private repos.
if "private" in repo["repo"]:
return False
# Only repos with a synth.py in the top-level directory.
if not gh.check_for_file(repo["repo"], "synth.py"):
return False

return True
from autosynth.providers.list_split_repositories import list_split_repositories


def list_repositories():
"""Finds repositories with a `synth.py` in the top-level"""
gh = github.GitHub(os.environ["GITHUB_TOKEN"])

repos = _get_repo_list_from_sloth(gh)
repos = [repo for repo in repos if _is_python_synth_repo(gh, repo)]

repo_list = [
{"name": repo["repo"].split("/")[-1], "repository": repo["repo"]}
for repo in repos
]

return repo_list
return list_split_repositories("python", ("Python",))


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit d79dc99

Please sign in to comment.