diff --git a/.github/workflows/verify_library_generation.yaml b/.github/workflows/verify_library_generation.yaml index 89cec5ddc9..6299c66612 100644 --- a/.github/workflows/verify_library_generation.yaml +++ b/.github/workflows/verify_library_generation.yaml @@ -97,7 +97,7 @@ jobs: - name: Run python unit tests run: | set -x - python -m unittest library_generation/test/unit_tests.py + python -m unittest discover -s library_generation/test/ -p "*unit_tests.py" lint-shell: runs-on: ubuntu-22.04 steps: diff --git a/library_generation/generate_pr_description.py b/library_generation/generate_pr_description.py new file mode 100644 index 0000000000..5c98912331 --- /dev/null +++ b/library_generation/generate_pr_description.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import shutil +from typing import Dict + +import click +from git import Commit, Repo +from library_generation.model.generation_config import from_yaml +from library_generation.utilities import find_versioned_proto_path +from library_generation.utils.commit_message_formatter import format_commit_message +from library_generation.utilities import get_file_paths +from library_generation.utils.commit_message_formatter import wrap_nested_commit + + +@click.group(invoke_without_command=False) +@click.pass_context +@click.version_option(message="%(version)s") +def main(ctx): + pass + + +@main.command() +@click.option( + "--generation-config-yaml", + required=True, + type=str, + help=""" + Path to generation_config.yaml that contains the metadata about + library generation. + The googleapis commit in the configuration is the latest commit, + inclusively, from which the commit message is considered. + """, +) +@click.option( + "--baseline-commit", + required=True, + type=str, + help=""" + The baseline (oldest) commit, exclusively, from which the commit message is + considered. + This commit should be an ancestor of googleapis commit in configuration. + """, +) +@click.option( + "--repo-url", + type=str, + default="https://github.com/googleapis/googleapis.git", + show_default=True, + help=""" + GitHub repository URL. + """, +) +def generate( + generation_config_yaml: str, + repo_url: str, + baseline_commit: str, +) -> str: + return generate_pr_descriptions( + generation_config_yaml=generation_config_yaml, + repo_url=repo_url, + baseline_commit=baseline_commit, + ) + + +def generate_pr_descriptions( + generation_config_yaml: str, + repo_url: str, + baseline_commit: str, +) -> str: + config = from_yaml(generation_config_yaml) + paths = get_file_paths(config) + return __get_commit_messages( + repo_url=repo_url, + latest_commit=config.googleapis_commitish, + baseline_commit=baseline_commit, + paths=paths, + generator_version=config.gapic_generator_version, + is_monorepo=config.is_monorepo, + ) + + +def __get_commit_messages( + repo_url: str, + latest_commit: str, + baseline_commit: str, + paths: Dict[str, str], + generator_version: str, + is_monorepo: bool, +) -> str: + """ + Combine commit messages of a repository from latest_commit to + baseline_commit. Only commits which change files in a pre-defined + file paths will be considered. + Note that baseline_commit should be an ancestor of latest_commit. + + :param repo_url: the url of the repository. + :param latest_commit: the newest commit to be considered in + selecting commit message. + :param baseline_commit: the oldest commit to be considered in + selecting commit message. This commit should be an ancestor of + :param paths: a mapping from file paths to library_name. + :param generator_version: the version of the generator. + :param is_monorepo: whether to generate commit messages in a monorepo. + :return: commit messages. + """ + tmp_dir = "/tmp/repo" + shutil.rmtree(tmp_dir, ignore_errors=True) + os.mkdir(tmp_dir) + repo = Repo.clone_from(repo_url, tmp_dir) + commit = repo.commit(latest_commit) + qualified_commits = {} + while str(commit.hexsha) != baseline_commit: + commit_and_name = __filter_qualified_commit(paths=paths, commit=commit) + if commit_and_name != (): + qualified_commits[commit_and_name[0]] = commit_and_name[1] + commit_parents = commit.parents + if len(commit_parents) == 0: + break + commit = commit_parents[0] + shutil.rmtree(tmp_dir, ignore_errors=True) + return __combine_commit_messages( + latest_commit=latest_commit, + baseline_commit=baseline_commit, + commits=qualified_commits, + generator_version=generator_version, + is_monorepo=is_monorepo, + ) + + +def __filter_qualified_commit(paths: Dict[str, str], commit: Commit) -> (Commit, str): + """ + Returns a tuple of a commit and libray_name. + A qualified commit means at least one file changes in that commit is + within the versioned proto_path in paths. + + :param paths: a mapping from versioned proto_path to library_name. + :param commit: a commit under consideration. + :return: a tuple of a commit and library_name if the commit is + qualified; otherwise an empty tuple. + """ + for file in commit.stats.files.keys(): + versioned_proto_path = find_versioned_proto_path(file) + if versioned_proto_path in paths: + return commit, paths[versioned_proto_path] + return () + + +def __combine_commit_messages( + latest_commit: str, + baseline_commit: str, + commits: Dict[Commit, str], + generator_version: str, + is_monorepo: bool, +) -> str: + messages = [ + f"This pull request is generated with proto changes between googleapis commit {baseline_commit} (exclusive) and {latest_commit} (inclusive).", + "Qualified commits are:", + ] + for commit in commits: + short_sha = commit.hexsha[:7] + messages.append( + f"[googleapis/googleapis@{short_sha}](https://github.com/googleapis/googleapis/commit/{commit.hexsha})" + ) + + messages.extend(format_commit_message(commits=commits, is_monorepo=is_monorepo)) + messages.extend( + wrap_nested_commit( + [ + f"feat: Regenerate with the Java code generator (gapic-generator-java) v{generator_version}" + ] + ) + ) + + return "\n".join(messages) + + +if __name__ == "__main__": + main() diff --git a/library_generation/test/integration_tests.py b/library_generation/test/integration_tests.py index 354229b684..3ca7d45881 100755 --- a/library_generation/test/integration_tests.py +++ b/library_generation/test/integration_tests.py @@ -17,12 +17,14 @@ import unittest from distutils.dir_util import copy_tree from distutils.file_util import copy_file +from filecmp import cmp from filecmp import dircmp from git import Repo from pathlib import Path from typing import List -from typing import Dict + +from library_generation.generate_pr_description import generate_pr_descriptions from library_generation.generate_repo import generate_from_yaml from library_generation.model.generation_config import from_yaml, GenerationConfig from library_generation.test.compare_poms import compare_xml @@ -49,6 +51,35 @@ class IntegrationTest(unittest.TestCase): + def test_get_commit_message_success(self): + repo_url = "https://github.com/googleapis/googleapis.git" + config_files = self.__get_config_files(config_dir) + monorepo_baseline_commit = "a17d4caf184b050d50cacf2b0d579ce72c31ce74" + split_repo_baseline_commit = "679060c64136e85b52838f53cfe612ce51e60d1d" + for repo, config_file in config_files: + baseline_commit = ( + monorepo_baseline_commit + if repo == "google-cloud-java" + else split_repo_baseline_commit + ) + description = generate_pr_descriptions( + generation_config_yaml=config_file, + repo_url=repo_url, + baseline_commit=baseline_commit, + ) + description_file = f"{config_dir}/{repo}/pr-description.txt" + if os.path.isfile(f"{description_file}"): + os.remove(f"{description_file}") + with open(f"{description_file}", "w+") as f: + f.write(description) + self.assertTrue( + cmp( + f"{config_dir}/{repo}/pr-description-golden.txt", + f"{description_file}", + ) + ) + os.remove(f"{description_file}") + def test_generate_repo(self): shutil.rmtree(f"{golden_dir}", ignore_errors=True) os.makedirs(f"{golden_dir}", exist_ok=True) @@ -150,7 +181,7 @@ def __pull_repo_to(cls, default_dest: Path, repo: str, committish: str) -> str: repo = Repo(dest) else: dest = default_dest - repo_dest = f"{golden_dir}/{repo}" + shutil.rmtree(dest, ignore_errors=True) repo_url = f"{repo_prefix}/{repo}" print(f"Cloning repository {repo_url}") repo = Repo.clone_from(repo_url, dest) @@ -169,6 +200,8 @@ def __get_library_names_from_config(cls, config: GenerationConfig) -> List[str]: def __get_config_files(cls, path: str) -> List[tuple[str, str]]: config_files = [] for sub_dir in Path(path).resolve().iterdir(): + if sub_dir.is_file(): + continue repo = sub_dir.name if repo == "golden": continue diff --git a/library_generation/test/resources/integration/google-cloud-java/generation_config.yaml b/library_generation/test/resources/integration/google-cloud-java/generation_config.yaml index 5b2b4d3f0f..eb2dcf7258 100644 --- a/library_generation/test/resources/integration/google-cloud-java/generation_config.yaml +++ b/library_generation/test/resources/integration/google-cloud-java/generation_config.yaml @@ -48,3 +48,17 @@ libraries: - proto_path: google/cloud/alloydb/connectors/v1 - proto_path: google/cloud/alloydb/connectors/v1alpha - proto_path: google/cloud/alloydb/connectors/v1beta + + - api_shortname: documentai + name_pretty: Document AI + product_documentation: https://cloud.google.com/compute/docs/documentai/ + api_description: allows developers to unlock insights from your documents with machine + learning. + library_name: document-ai + release_level: stable + issue_tracker: https://issuetracker.google.com/savedsearches/559755 + GAPICs: + - proto_path: google/cloud/documentai/v1 + - proto_path: google/cloud/documentai/v1beta1 + - proto_path: google/cloud/documentai/v1beta2 + - proto_path: google/cloud/documentai/v1beta3 diff --git a/library_generation/test/resources/integration/google-cloud-java/pr-description-golden.txt b/library_generation/test/resources/integration/google-cloud-java/pr-description-golden.txt new file mode 100644 index 0000000000..c095ab12a1 --- /dev/null +++ b/library_generation/test/resources/integration/google-cloud-java/pr-description-golden.txt @@ -0,0 +1,19 @@ +This pull request is generated with proto changes between googleapis commit a17d4caf184b050d50cacf2b0d579ce72c31ce74 (exclusive) and 1a45bf7393b52407188c82e63101db7dc9c72026 (inclusive). +Qualified commits are: +[googleapis/googleapis@7a9a855](https://github.com/googleapis/googleapis/commit/7a9a855287b5042410c93e5a510f40efd4ce6cb1) +[googleapis/googleapis@c7fd8bd](https://github.com/googleapis/googleapis/commit/c7fd8bd652ac690ca84f485014f70b52eef7cb9e) +BEGIN_NESTED_COMMIT +feat: [document-ai] expose model_type in v1 processor, so that user can see the model_type after get or list processor version + +PiperOrigin-RevId: 603727585 + +END_NESTED_COMMIT +BEGIN_NESTED_COMMIT +feat: [document-ai] add model_type in v1beta3 processor proto + +PiperOrigin-RevId: 603726122 + +END_NESTED_COMMIT +BEGIN_NESTED_COMMIT +feat: Regenerate with the Java code generator (gapic-generator-java) v2.34.0 +END_NESTED_COMMIT \ No newline at end of file diff --git a/library_generation/test/resources/integration/java-bigtable/pr-description-golden.txt b/library_generation/test/resources/integration/java-bigtable/pr-description-golden.txt new file mode 100644 index 0000000000..08cd7d15d1 --- /dev/null +++ b/library_generation/test/resources/integration/java-bigtable/pr-description-golden.txt @@ -0,0 +1,19 @@ +This pull request is generated with proto changes between googleapis commit 679060c64136e85b52838f53cfe612ce51e60d1d (exclusive) and fc3043ebe12fb6bc1729c175e1526c859ce751d8 (inclusive). +Qualified commits are: +[googleapis/googleapis@fbcfef0](https://github.com/googleapis/googleapis/commit/fbcfef09510b842774530989889ed1584a8b5acb) +[googleapis/googleapis@63d2a60](https://github.com/googleapis/googleapis/commit/63d2a60056ad5b156c05c7fb13138fc886c3b739) +BEGIN_NESTED_COMMIT +fix: extend timeouts for deleting snapshots, backups and tables + +PiperOrigin-RevId: 605388988 + +END_NESTED_COMMIT +BEGIN_NESTED_COMMIT +chore: update retry settings for backup rpcs + +PiperOrigin-RevId: 605367937 + +END_NESTED_COMMIT +BEGIN_NESTED_COMMIT +feat: Regenerate with the Java code generator (gapic-generator-java) v2.35.0 +END_NESTED_COMMIT \ No newline at end of file diff --git a/library_generation/test/unit_tests.py b/library_generation/test/unit_tests.py index 54940def12..909414e20e 100644 --- a/library_generation/test/unit_tests.py +++ b/library_generation/test/unit_tests.py @@ -22,6 +22,7 @@ import contextlib from pathlib import Path from difflib import unified_diff + from typing import List from parameterized import parameterized from library_generation import utilities as util @@ -30,6 +31,8 @@ from library_generation.model.gapic_inputs import parse as parse_build_file from library_generation.model.generation_config import from_yaml from library_generation.model.library_config import LibraryConfig +from library_generation.utilities import find_versioned_proto_path +from library_generation.utilities import get_file_paths script_dir = os.path.dirname(os.path.realpath(__file__)) resources_dir = os.path.join(script_dir, "resources") @@ -214,6 +217,36 @@ def test_from_yaml_succeeds(self): self.assertEqual("google/cloud/asset/v1p5beta1", gapics[3].proto_path) self.assertEqual("google/cloud/asset/v1p7beta1", gapics[4].proto_path) + def test_get_file_paths_from_yaml_success(self): + paths = get_file_paths(from_yaml(f"{test_config_dir}/generation_config.yaml")) + self.assertEqual( + { + "google/cloud/asset/v1": "asset", + "google/cloud/asset/v1p1beta1": "asset", + "google/cloud/asset/v1p2beta1": "asset", + "google/cloud/asset/v1p5beta1": "asset", + "google/cloud/asset/v1p7beta1": "asset", + }, + paths, + ) + + @parameterized.expand( + [ + ( + "google/cloud/aiplatform/v1/schema/predict/params/image_classification.proto", + "google/cloud/aiplatform/v1", + ), + ( + "google/cloud/asset/v1p2beta1/assets.proto", + "google/cloud/asset/v1p2beta1", + ), + ("google/type/color.proto", "google/type/color.proto"), + ] + ) + def test_find_versioned_proto_path(self, file_path, expected): + proto_path = find_versioned_proto_path(file_path) + self.assertEqual(expected, proto_path) + @parameterized.expand( [ ("BUILD_no_additional_protos.bazel", " "), diff --git a/library_generation/test/utils/__init__.py b/library_generation/test/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/library_generation/test/utils/commit_message_formatter_unit_tests.py b/library_generation/test/utils/commit_message_formatter_unit_tests.py new file mode 100644 index 0000000000..5fd3599963 --- /dev/null +++ b/library_generation/test/utils/commit_message_formatter_unit_tests.py @@ -0,0 +1,116 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest +from unittest.mock import patch + +from library_generation.utils.commit_message_formatter import format_commit_message +from library_generation.utils.commit_message_formatter import wrap_nested_commit + + +class CommitMessageFormatterTest(unittest.TestCase): + def test_format_commit_message_should_add_library_name_for_conventional_commit( + self, + ): + with patch("git.Commit") as mock_commit: + commit = mock_commit.return_value + commit.message = "feat: a commit message\nPiperOrigin-RevId: 123456" + commits = {commit: "example_library"} + self.assertEqual( + [ + "BEGIN_NESTED_COMMIT", + "feat: [example_library] a commit message", + "PiperOrigin-RevId: 123456", + "END_NESTED_COMMIT", + ], + format_commit_message(commits, True), + ) + + def test_format_commit_message_should_add_library_name_for_mutliline_conventional_commit( + self, + ): + with patch("git.Commit") as mock_commit: + commit = mock_commit.return_value + commit.message = "feat: a commit message\nfix: an another commit message\nPiperOrigin-RevId: 123456" + commits = {commit: "example_library"} + self.assertEqual( + [ + "BEGIN_NESTED_COMMIT", + "feat: [example_library] a commit message", + "fix: [example_library] an another commit message", + "PiperOrigin-RevId: 123456", + "END_NESTED_COMMIT", + ], + format_commit_message(commits, True), + ) + + def test_format_commit_message_should_not_add_library_name_for_nonconvnentional_commit( + self, + ): + with patch("git.Commit") as mock_commit: + commit = mock_commit.return_value + commit.message = "PiperOrigin-RevId: 123456" + commits = {commit: "example_library"} + self.assertEqual( + [ + "BEGIN_NESTED_COMMIT", + "PiperOrigin-RevId: 123456", + "END_NESTED_COMMIT", + ], + format_commit_message(commits, True), + ) + + def test_format_commit_message_should_not_add_library_name_if_not_monorepo(self): + with patch("git.Commit") as mock_commit: + commit = mock_commit.return_value + commit.message = "feat: a commit message\nPiperOrigin-RevId: 123456" + commits = {commit: "example_library"} + self.assertEqual( + [ + "BEGIN_NESTED_COMMIT", + "feat: a commit message", + "PiperOrigin-RevId: 123456", + "END_NESTED_COMMIT", + ], + format_commit_message(commits, False), + ) + + def test_format_commit_message_should_not_add_library_name_for_multiline_commit_if_not_monorepo( + self, + ): + with patch("git.Commit") as mock_commit: + commit = mock_commit.return_value + commit.message = "feat: a commit message\nfix: an another commit message\nPiperOrigin-RevId: 123456" + commits = {commit: "example_library"} + self.assertEqual( + [ + "BEGIN_NESTED_COMMIT", + "feat: a commit message", + "fix: an another commit message", + "PiperOrigin-RevId: 123456", + "END_NESTED_COMMIT", + ], + format_commit_message(commits, False), + ) + + def test_wrap_nested_commit_success(self): + messages = ["a commit message", "another message"] + self.assertEqual( + [ + "BEGIN_NESTED_COMMIT", + "a commit message", + "another message", + "END_NESTED_COMMIT", + ], + wrap_nested_commit(messages), + ) diff --git a/library_generation/utilities.py b/library_generation/utilities.py index 0d1fd39c8f..2faee66f7a 100755 --- a/library_generation/utilities.py +++ b/library_generation/utilities.py @@ -18,6 +18,8 @@ import shutil import re from pathlib import Path +from typing import Dict + from lxml import etree from library_generation.model.bom_config import BomConfig from library_generation.model.generation_config import GenerationConfig @@ -483,3 +485,36 @@ def get_version_from( for line in f.readlines(): if artifact_id in line: return line.split(":")[index].strip() + + +def get_file_paths(config: GenerationConfig) -> Dict[str, str]: + """ + Get versioned proto_path to library_name mapping from configuration file. + + :param config: a GenerationConfig object. + :return: versioned proto_path to library_name mapping + """ + paths = {} + for library in config.libraries: + for gapic_config in library.gapic_configs: + paths[gapic_config.proto_path] = get_library_name(library) + return paths + + +def find_versioned_proto_path(file_path: str) -> str: + """ + Returns a versioned proto_path from a given file_path; or file_path itself + if it doesn't contain a versioned proto_path. + + :param file_path: a proto file path + :return: the versioned proto_path + """ + version_regex = re.compile(r"^v[1-9].*") + directories = file_path.split("/") + for directory in directories: + result = version_regex.search(directory) + if result: + version = result[0] + idx = file_path.find(version) + return file_path[:idx] + version + return file_path diff --git a/library_generation/utils/commit_message_formatter.py b/library_generation/utils/commit_message_formatter.py new file mode 100644 index 0000000000..afa51a4db6 --- /dev/null +++ b/library_generation/utils/commit_message_formatter.py @@ -0,0 +1,64 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import re +from typing import List +from typing import Dict +from git import Commit + + +def format_commit_message(commits: Dict[Commit, str], is_monorepo: bool) -> List[str]: + """ + Format commit messages. Add library_name to conventional commit messages if + is_monorepo is True; otherwise no op. + + :param commits: a mapping from commit to library_name. + :param is_monorepo: whether it's monorepo or not. + :return: formatted commit messages. + """ + all_commits = [] + # please see go/java-client-releasing#conventional-commit-messages + # for conventional commit. + type_regex = re.compile(r"(feat|fix|docs|deps|test|samples|chore)!?:.*") + for commit, library_name in commits.items(): + # a commit message may contain multiple lines, we need to + # add library_name for each line. + messages = [] + for message_line in commit.message.split("\n"): + # add library name to a conventional commit message; + # otherwise no op. + if type_regex.search(message_line): + commit_type, _, summary = message_line.partition(":") + formatted_message = ( + f"{commit_type}: [{library_name}]{str(summary).rstrip()}" + if is_monorepo + else f"{commit_type}:{str(summary).rstrip()}" + ) + messages.append(formatted_message) + else: + messages.append(message_line) + all_commits.extend(wrap_nested_commit(messages)) + return all_commits + + +def wrap_nested_commit(messages: List[str]) -> List[str]: + """ + Wrap message between `BEGIN_NESTED_COMMIT` and `BEGIN_NESTED_COMMIT`. + + :param messages: a (multi-line) commit message, one line per item. + :return: wrapped messages. + """ + result = ["BEGIN_NESTED_COMMIT"] + result.extend(messages) + result.append("END_NESTED_COMMIT") + return result