Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: remove all dependencies on sloth #649

Merged
39 changes: 37 additions & 2 deletions autosynth/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,18 @@
# limitations under the License.

import base64
from typing import Generator, Sequence, Dict, Optional, Union, List, cast
from typing import (
Dict,
Generator,
List,
Optional,
Sequence,
Union,
cast,
)

import requests

from autosynth.log import logger

_GITHUB_ROOT: str = "https://api.github.com"
Expand Down Expand Up @@ -182,7 +192,13 @@ def list_files(self, repository: str, path: str, ref: str = None) -> Sequence[Di
"""
url = f"{_GITHUB_ROOT}/repos/{repository}/contents/{path}"
response = self.session.get(url, params={"ref": ref})
return cast(List[Dict], _get_json_or_raise_exception(response))
try:
return cast(List[Dict], _get_json_or_raise_exception(response))
except requests.exceptions.HTTPError as e:
if e.response.status_code == 404:
return []
else:
raise

def check_for_file(self, repository: str, path: str, ref: str = None) -> bool:
"""Check to see if a file exists in a given repository.
Expand Down Expand Up @@ -353,6 +369,25 @@ def update_pull_labels(
labels=list(label_names),
)

def list_repos(self, org: str) -> List[str]:
"""Returns a list of all the repositories in an organization.

Args:
org (str): The name of the organization.

Returns:
List[str]: The list of repository names.
"""
url = f"{_GITHUB_ROOT}/orgs/{org}/repos?type=public"
SurferJeffAtGoogle marked this conversation as resolved.
Show resolved Hide resolved
repo_names = []
while url:
response = self.session.get(url)
json = _get_json_or_raise_exception(response)
for repo in json:
repo_names.append(repo["name"])
url = response.links.get("next", {}).get("url")
return repo_names

def get_labels(self, repository: str) -> Sequence[str]:
"""Returns labels for a repository.

Expand Down
5 changes: 3 additions & 2 deletions autosynth/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,9 @@ def load_config(
try:
provider = importlib.import_module(config)
return provider.list_repositories() # type: ignore
except (ImportError, AttributeError):
pass
except (ImportError, AttributeError) as e:
logger.warning("Failed to load %s", config)
logger.warning("%s", e)
return None


Expand Down
Empty file.
73 changes: 3 additions & 70 deletions autosynth/providers/java.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,80 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import os

from autosynth import github
from autosynth.providers.list_split_repositories import list_split_repositories


def list_repositories():
gh = github.GitHub(os.environ["GITHUB_TOKEN"])

return _google_cloud_java_repos(gh) + _other_repos(gh)


def _google_cloud_java_repos(gh):
# Presently this only enumerates folders from the google-cloud-java
# monorepo.
repo = "googleapis/google-cloud-java"

clients = gh.list_files(repo, "google-cloud-clients")
subdirs = [item["path"] for item in clients if item["type"] == "dir"]

# No hidden dirs
subdirs = [subdir for subdir in subdirs if not subdir.startswith(".")]

# Only subdirs that have synth.py files.
subdirs = [
subdir for subdir in subdirs if gh.check_for_file(repo, f"{subdir}/synth.py")
]

return [_config_for_subdir(repo, subdir) for subdir in subdirs]


def _other_repos(gh):
repos = _get_repo_list_from_sloth(gh)
repos = [repo for repo in repos if _is_java_synth_repo(gh, repo)]

return [
{"name": repo["repo"].split("/")[-1], "repository": repo["repo"]}
for repo in repos
]


def _config_for_subdir(repo: str, subdir: str):
api = subdir.split("/")[1].replace("google-cloud-", "")

return {
"name": api,
"repository": repo,
"synth-path": subdir,
"branch-suffix": api,
"pr-title": f"Regenerate {api} client",
}


def _get_repo_list_from_sloth(gh):
contents = gh.get_contents("googleapis/sloth", "repos.json")
repos = json.loads(contents)["repos"]
return repos


def _is_java_synth_repo(gh, repo):
# Only java repos.
if repo["language"] != "java":
return False
# No private repos.
if "private" in repo["repo"]:
return False
# Only repos with a synth.py in the top-level directory.
if not gh.check_for_file(repo["repo"], "synth.py"):
return False
repos = list_split_repositories("java")
# Ignore apiary services repo (has separate provider)
if repo["repo"] == "googleapis/google-api-java-client-services":
return False

return True
return [repo for repo in repos if repo["name"] != "google-api-java-client-services"]


if __name__ == "__main__":
Expand Down
46 changes: 46 additions & 0 deletions autosynth/providers/list_split_repositories.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Copyright 2018 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import pathlib
from typing import Dict, List

import yaml

from autosynth import github


def list_split_repositories(lang: str) -> List[Dict]:
"""Finds repos in github named like *-lang-*, and combines with lang-addendum.yaml."""
# Load the repo list from the addendum.
text = open(pathlib.Path(__file__).parent / f"{lang}-addendum.yaml", "rt").read()
addendum = list(yaml.safe_load(text) or [])
# Find repos on github.
repos = _list_github_repositories(lang) + addendum
repos.sort(key=lambda x: x["name"])
return repos


def _list_github_repositories(lang: str) -> List[Dict]:
"""Find repos on github with a matching named and synth.py file."""
gh = github.GitHub(os.environ["GITHUB_TOKEN"])
all_repos = gh.list_repos("googleapis")
lang_repos = [repo for repo in all_repos if lang in repo.split("-")]
synth_repos = []
for repo in lang_repos:
# Only repos with a synth.py in the top-level directory.
if not gh.list_files(f"googleapis/{repo}", "synth.py"):
continue
synth_repos.append({"name": repo, "repository": f"googleapis/{repo}"})
return synth_repos
25 changes: 25 additions & 0 deletions autosynth/providers/nodejs-addendum.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
- name: gaxios
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to look at other ways to approach this problem. I am very worried about having yet another place where we have to list out all applicable targets. Two different thoughts.

Using the GitHub Search API
To get around this problem in github-repo-automation, I opted to use the GitHub Search API to identify the relevant repositories:
https://developer.github.com/v3/search/#search-repositories

The query we used there looks like this:

org:googleapis language:typescript language:javascript is:public archived:false

Inversion of control
Maybe a better question is, why do we need to have a centralized place for these jobs to run. Should we consider pushing responsibility of registration down to the repository level? I could absolutely see a future where orgs outside of googleapis (ads, analytics) want to use this tool. Is there a reason we couldn't ask repository owners to have a kokoro job on a cron that calls autosynth on their own?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to look at other ways to approach this problem. I am very worried about having yet another place where we have to list out all applicable targets. Two different thoughts.

Using the GitHub Search API
To get around this problem in github-repo-automation, I opted to use the GitHub Search API to identify the relevant repositories:
https://developer.github.com/v3/search/#search-repositories

The query we used there looks like this:

org:googleapis language:typescript language:javascript is:public archived:false

That's actually how I wrote the code the first time. The problem is that all the queries came back marked incomplete, and indeed were missing a few results. So autosynth would not regenerate a few unlucky repos every day.

https://developer.github.com/v3/search/#timeouts-and-incomplete-results

Inversion of control
Maybe a better question is, why do we need to have a centralized place for these jobs to run. Should we consider pushing responsibility of registration down to the repository level?

That's kinda what this code does for all but about 10 repos. Register your new nodejs repo by adding synth.py to the root, and autosynth will start generating it.

If we pushed kokoro registration down to the repo levels, then we'd be maintaining 300+ kokoro job configs in piper, that have to stay in sync with 300+ build configs and synth.py files in the repos.

I could absolutely see a future where orgs outside of googleapis (ads, analytics) want to use this tool. Is there a reason we couldn't ask repository owners to have a kokoro job on a cron that calls autosynth on their own?

I see no reason. They can do that today.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok - on the search result API thing, bummer, didn't know that, thank you :) I'm still wigged out about the 10 weird repos in the yaml. Instead, could we use the repos list API, find all TypeScript/JavaScript APIs with a synth.py, and use those?
https://developer.github.com/v3/repos/#list-organization-repositories

I'm really worried we are going to create a new repo without the nodejs- prefix, try to start using synthtool, and forget to register it here. Thinking out loud - I wonder if a model like this wouldn't be better to drive the entire autosynth process. Grab every repo in the org, check if it has a synth.py, and run it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I'll write the code to scan all the repos that contain a majority of javascript code and a synth.py file.

One issue that prevents a more general solution is that different languages expect a different build environment, as is manifest by the .sh files in https://github.com/googleapis/synthtool/tree/master/.kokoro-autosynth.

If all 7 languages attempted to examine all googleapis' repos, then that would be 7 languages * hundreds of repos = 1000s of API calls within the same hour, and I worry we'd exceed our github API quota.

We could work around that with bots or maybe github actions (can the be installed for a whole organization?) that update a master list periodically, or maybe using an https cache, but that's a much larger solution.

repository: googleapis/gaxios
- name: gcp-metadata
repository: googleapis/gcp-metadata
- name: gcs-resumable-upload
repository: googleapis/gcs-resumable-upload
- name: github-repo-automation
repository: googleapis/github-repo-automation
- name: google-cloud-node
repository: googleapis/google-cloud-node
- name: google-p12-pem
repository: googleapis/google-p12-pem
- name: jsdoc-fresh
repository: googleapis/jsdoc-fresh
- name: node-gtoken
repository: googleapis/node-gtoken
- name: release-please
repository: googleapis/release-please
- name: repo-automation-bots
repository: googleapis/repo-automation-bots
- name: sloth
repository: googleapis/sloth
- name: teeny-request
repository: googleapis/teeny-request

34 changes: 2 additions & 32 deletions autosynth/providers/nodejs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,41 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import os

from autosynth import github


def _get_repo_list_from_sloth(gh):
contents = gh.get_contents("googleapis/sloth", "repos.json")
repos = json.loads(contents)["repos"]
return repos


def _is_nodejs_synth_repo(gh, repo):
# Only nodejs repos.
if repo["language"] != "nodejs":
return False
# No private repos.
if "private" in repo["repo"]:
return False
# Only repos with a synth.py in the top-level directory.
if not gh.check_for_file(repo["repo"], "synth.py"):
return False

return True
from autosynth.providers.list_split_repositories import list_split_repositories


def list_repositories():
gh = github.GitHub(os.environ["GITHUB_TOKEN"])
repos = _get_repo_list_from_sloth(gh)
repos = [repo for repo in repos if _is_nodejs_synth_repo(gh, repo)]

return [
{"name": repo["repo"].split("/")[-1], "repository": repo["repo"]}
for repo in repos
]
return list_split_repositories("nodejs")


if __name__ == "__main__":
Expand Down
Empty file.
39 changes: 2 additions & 37 deletions autosynth/providers/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,46 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import os

from autosynth import github


def _get_repo_list_from_sloth(gh):
contents = gh.get_contents("googleapis/sloth", "repos.json")
repos = json.loads(contents)["repos"]
return repos


def _is_python_synth_repo(gh, repo):
"""Finds Python repositories with synth files in the top-level directory."""
# Only python repos.
if repo["language"] != "python":
return False
# No private repos.
if "private" in repo["repo"]:
return False
# Only repos with a synth.py in the top-level directory.
if not gh.check_for_file(repo["repo"], "synth.py"):
return False

return True
from autosynth.providers.list_split_repositories import list_split_repositories


def list_repositories():
"""Finds repositories with a `synth.py` in the top-level"""
gh = github.GitHub(os.environ["GITHUB_TOKEN"])

repos = _get_repo_list_from_sloth(gh)
repos = [repo for repo in repos if _is_python_synth_repo(gh, repo)]

repo_list = [
{"name": repo["repo"].split("/")[-1], "repository": repo["repo"]}
for repo in repos
]

return repo_list
return list_split_repositories("python")


if __name__ == "__main__":
Expand Down
33 changes: 33 additions & 0 deletions tests/test_github.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import os

from autosynth import github

requires_github_token = unittest.skipIf(
not os.environ.get("GITHUB_TOKEN", False),
"Set the environment variable GITHUB_TOKEN to run this test.",
)


def new_gh():
return github.GitHub(os.environ["GITHUB_TOKEN"])


@requires_github_token
def test_list_repos():
gh = new_gh()
repos = gh.list_repos("googleapis")
assert len(repos) > 10