diff --git a/build/release/pre_gen_release_notes.py b/build/release/pre_gen_release_notes.py new file mode 100755 index 00000000000..ec2fcee4b54 --- /dev/null +++ b/build/release/pre_gen_release_notes.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python3 + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This script is inspired by Apache Spark + +# This script simplifies the process of creating release notes, it +# - folds the original and the revert commits +# - filters out unrelated commits +# - generates the contributor list +# - canonicalizes the contributors' name with the known_translations + +# TODO +# - canonicalizes the commits' title + +# Usage: +# set environment variables: RELEASE_TAG and PREVIOUS_RELEASE_TAG, then perform +# ./pre_gen_release_notes.py +# Example: +# RELEASE_TAG=v1.8.1 PREVIOUS_RELEASE_TAG=v1.8.0 ./pre_gen_release_notes.py + +# It outputs +# - commits-${RELEASE_TAG}.txt: the canonical commit list +# - contributors-${RELEASE_TAG}.txt: the canonical contributor list + +import os +import re +import sys + +from release_utils import ( + tag_exists, + get_commits, + yes_or_no_prompt, + get_date, + is_valid_author, + capitalize_author, + print_indented +) + +RELEASE_TAG = os.environ.get("RELEASE_TAG") +if RELEASE_TAG is None: + sys.exit("RELEASE_TAG is required") +if not tag_exists(RELEASE_TAG): + sys.exit("RELEASE_TAG: %s does not exist!" % RELEASE_TAG) + +PREVIOUS_RELEASE_TAG = os.environ.get("PREVIOUS_RELEASE_TAG") +if PREVIOUS_RELEASE_TAG is None: + sys.exit("PREVIOUS_RELEASE_TAG is required") +if not tag_exists(PREVIOUS_RELEASE_TAG): + sys.exit("PREVIOUS_RELEASE_TAG: %s does not exist!" % PREVIOUS_RELEASE_TAG) + +release_dir = os.path.dirname(os.path.abspath(__file__)) +commits_file_name = "commits-%s.txt" % RELEASE_TAG +contributors_file_name = "contributors-%s.txt" % RELEASE_TAG + +# Gather commits found in the new tag but not in the old tag. +# This filters commits based on both the git hash and the PR number. +# If either is present in the old tag, then we ignore the commit. +print("Gathering new commits between tags %s and %s" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG)) +release_commits = get_commits(RELEASE_TAG) +previous_release_commits = get_commits(PREVIOUS_RELEASE_TAG) +previous_release_hashes = set() +previous_release_prs = set() +for old_commit in previous_release_commits: + previous_release_hashes.add(old_commit.get_hash()) + if old_commit.get_pr_number(): + previous_release_prs.add(old_commit.get_pr_number()) +new_commits = [] +for this_commit in release_commits: + this_hash = this_commit.get_hash() + this_pr_number = this_commit.get_pr_number() + if this_hash in previous_release_hashes: + continue + if this_pr_number and this_pr_number in previous_release_prs: + continue + new_commits.append(this_commit) +if not new_commits: + sys.exit("There are no new commits between %s and %s!" % (PREVIOUS_RELEASE_TAG, RELEASE_TAG)) + +# Prompt the user for confirmation that the commit range is correct +print("\n==================================================================================") +print("Release tag: %s" % RELEASE_TAG) +print("Previous release tag: %s" % PREVIOUS_RELEASE_TAG) +print("Number of commits in this range: %s" % len(new_commits)) +print("") + +if yes_or_no_prompt("Show all commits?"): + print_indented(new_commits) +print("==================================================================================\n") +if not yes_or_no_prompt("Does this look correct?"): + sys.exit("Ok, exiting") + +# Filter out special commits +releases = [] +reverts = [] +no_tickets = [] +effective_commits = [] + +def is_release(commit_title): + return "[release]" in commit_title.lower() + + +def has_no_ticket(commit_title): + return not re.findall("\\[KYUUBI\\s\\#[0-9]+\\]", commit_title.upper()) + + +def is_revert(commit_title): + return "revert" in commit_title.lower() + + +for c in new_commits: + t = c.get_title() + if not t: + continue + elif is_release(t): + releases.append(c) + elif is_revert(t): + reverts.append(c) + elif has_no_ticket(t): + no_tickets.append(c) + else: + effective_commits.append(c) + + +# Warn against ignored commits +if releases or reverts or no_tickets: + print("\n==================================================================================") + if releases: + print("Found %d release commits" % len(releases)) + if reverts: + print("Found %d revert commits" % len(reverts)) + if no_tickets: + print("Found %d commits with no Ticket" % len(no_tickets)) + print("==================== Warning: these commits will be ignored ======================\n") + if releases: + print("Release (%d)" % len(releases)) + print_indented(releases) + if reverts: + print("Revert (%d)" % len(reverts)) + print_indented(reverts) + if no_tickets: + print("No Ticket (%d)" % len(no_tickets)) + print_indented(no_tickets) + print("==================== Warning: the above commits will be ignored ==================\n") +prompt_msg = "%d effective commits left to process after filtering. OK to proceed?" % len(effective_commits) +if not yes_or_no_prompt(prompt_msg): + sys.exit("OK, exiting.") + + +# Load known author translations that are cached locally +known_translations = {} +known_translations_file_name = "known_translations" +known_translations_file = open(os.path.join(release_dir, known_translations_file_name), "r") +for line in known_translations_file: + if line.startswith("#") or not line.strip(): + continue + [old_name, new_name] = line.strip("\n").split(" - ") + known_translations[old_name] = new_name +known_translations_file.close() + +# Keep track of warnings to tell the user at the end +warnings = [] + +# Mapping from the invalid author name to its associated tickets +# E.g. pan3793 -> set("[KYUUBI #1234]", "[KYUUBI #1235]") +invalid_authors = {} + +# Populate a map that groups issues and components by author +# It takes the form: Author Name -> set() +# For instance, +# { +# 'Cheng Pan' -> set('[KYUUBI #1234]', '[KYUUBI #1235]'), +# 'Fu Chen' -> set('[KYUUBI #2345]') +# } +# +author_info = {} +print("\n=========================== Compiling contributor list ===========================") +for commit in effective_commits: + _hash = commit.get_hash() + title = commit.get_title() + issues = re.findall("\\[KYUUBI\\s\\#[0-9]+\\]", title.upper()) + author = commit.get_author() + date = get_date(_hash) + # Translate the known author name + if author in known_translations: + author = known_translations[author] + elif is_valid_author(author): + # If the author name is invalid, keep track of it along + # with all associated issues so we can translate it later + author = capitalize_author(author) + else: + if author not in invalid_authors: + invalid_authors[author] = set() + for issue in issues: + invalid_authors[author].add(issue) + # Populate or merge an issue into author_info[author] + def populate(issues): + if author not in author_info: + author_info[author] = set() + for issue in issues: + author_info[author].add(issue) + # Find issues associated with this commit + try: + populate(issues) + except Exception as e: + print("Unexpected error:", e) + print(" Processed commit %s authored by %s on %s" % (_hash, author, date)) +print("==================================================================================\n") + +commits_file = open(os.path.join(release_dir, commits_file_name), "w") +for commit in effective_commits: + if commit.get_hash() not in map(lambda revert: revert.get_revert_hash(), reverts): + commits_file.write(commit.title + "\n") +for commit in no_tickets: + commits_file.write(commit.title + "\n") +commits_file.close() +print("Commits list is successfully written to %s!" % commits_file_name) + +# Write to contributors file ordered by author names +# Each line takes the format " * Author Name -- tickets" +# e.g. * Cheng Pan -- [KYUUBI #1234][KYUUBI #1235] +# e.g. * Fu Chen -- [KYUUBI #2345] +contributors_file = open(os.path.join(release_dir, contributors_file_name), "w") +authors = list(author_info.keys()) +authors.sort(key=lambda author: author.split(" ")[-1]) +author_max_len = max(len(author) for author in authors) +for author in authors: + contribution = "".join(author_info[author]) + line = ("* {:<%s}" % author_max_len).format(author) + " -- " + contribution + contributors_file.write(line + "\n") +contributors_file.close() +print("Contributors list is successfully written to %s!" % contributors_file_name) + +# Prompt the user to translate author names if necessary +if invalid_authors: + warnings.append("Found the following invalid authors:") + for a in invalid_authors: + warnings.append("\t%s" % a) + warnings.append("Please update 'known_translations'.") + +# Log any warnings encountered in the process +if warnings: + print("\n============ Warnings encountered while creating the contributor list ============") + for w in warnings: + print(w) + print("Please correct these in the final contributors list at %s." % contributors_file_name) + print("==================================================================================\n") diff --git a/build/release/release_utils.py b/build/release/release_utils.py new file mode 100755 index 00000000000..d34f74d3662 --- /dev/null +++ b/build/release/release_utils.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This script is inspired by Apache Spark + +# This file contains helper methods used in creating a release. + +import re +import sys +from subprocess import Popen, PIPE + + +# Prompt the user to answer yes or no until they do so +def yes_or_no_prompt(msg): + response = input("%s [y/n]: " % msg) + while response != "y" and response != "n": + return yes_or_no_prompt(msg) + return response == "y" + + +def run_cmd(cmd): + return Popen(cmd, stdout=PIPE).communicate()[0].decode("utf8") + + +def run_cmd_error(cmd): + return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1].decode("utf8") + + +def get_date(commit_hash): + return run_cmd(["git", "show", "--quiet", "--pretty=format:%cd", commit_hash]) + + +def tag_exists(tag): + stderr = run_cmd_error(["git", "show", tag]) + return "error" not in stderr and "fatal" not in stderr + + +# A type-safe representation of a commit +class Commit: + def __init__(self, _hash, author, title, pr_number=None, revert_hash=None): + self._hash = _hash + self.author = author + self.title = title + self.pr_number = pr_number + self.revert_hash = revert_hash + + def get_hash(self): + return self._hash + + def get_author(self): + return self.author + + def get_title(self): + return self.title + + def get_pr_number(self): + return self.pr_number + + def get_revert_hash(self): + return self.revert_hash + + def __str__(self): + closes_pr = "(Closes #%s)" % self.pr_number if self.pr_number else "" + revert_commit = "(Reverts %s)" % self.revert_hash if self.revert_hash else "" + return "%s %s %s %s %s" % (self._hash, self.author, self.title, closes_pr, revert_commit) + + +# Return all commits that belong to the specified tag. +# +# Under the hood, this runs a `git log` on that tag and parses the fields +# from the command output to construct a list of Commit objects. Note that +# because certain fields reside in the commit description, we need to do +# some intelligent regex parsing to extract those fields. +def get_commits(tag): + commit_start_marker = "|=== COMMIT START MARKER ===|" + commit_end_marker = "|=== COMMIT END MARKER ===|" + field_end_marker = "|=== COMMIT FIELD END MARKER ===|" + log_format = ( + commit_start_marker + + "%h" + + field_end_marker + + "%an" + + field_end_marker + + "%s" + + commit_end_marker + + "%b" + ) + output = run_cmd(["git", "log", "--quiet", "--pretty=format:" + log_format, tag]) + commits = [] + raw_commits = [c for c in output.split(commit_start_marker) if c] + for commit in raw_commits: + if commit.count(commit_end_marker) != 1: + print("Commit end marker not found in commit: ") + for line in commit.split("\n"): + print(line) + sys.exit(1) + # Separate commit digest from the body + # From the digest we extract the hash, author and the title + # From the body, we extract the PR number and the github username + [commit_digest, commit_body] = commit.split(commit_end_marker) + if commit_digest.count(field_end_marker) != 2: + sys.exit("Unexpected format in commit: %s" % commit_digest) + [_hash, author, title] = commit_digest.split(field_end_marker) + # The PR number and github username is in the commit message + # itself and cannot be accessed through any GitHub API + pr_number = None + match = re.search("Closes #([0-9]+) from ([^/\\s]+)/", commit_body) + if match: + [pr_number, github_username] = match.groups() + # If the author name is not valid, use the github + # username so we can translate it properly later + if not is_valid_author(author): + author = github_username + author = author.strip() + revert_hash = None + match = re.search("This reverts commit ([0-9a-f]+)", commit_body) + if match: + [revert_hash] = match.groups() + revert_hash = revert_hash[:9] + commit = Commit(_hash, author, title, pr_number, revert_hash) + commits.append(commit) + return commits + + +# Return whether the given name is in the form +def is_valid_author(author): + if not author: + return False + return " " in author and not re.findall("[0-9]", author) + + +# Capitalize the first letter of each word in the given author name +def capitalize_author(author): + if not author: + return None + words = author.split(" ") + words = [w[0].capitalize() + w[1:] for w in words if w] + return " ".join(words) + + +def print_indented(_list): + for x in _list: + print(" %s" % x)