diff --git a/.github/workflows/perf.yml b/.github/workflows/perf.yml new file mode 100644 index 0000000000..55aba319a4 --- /dev/null +++ b/.github/workflows/perf.yml @@ -0,0 +1,54 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +name: Performance Analysis + +on: + pull_request: + types: [opened, synchronize] + +jobs: + plot: + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + deployments: read + packages: none + pull-requests: write + security-events: write + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Parse Statistics + id: parse-stats + run: | + git show --format=%H -s HEAD + git rev-list --max-parents=0 HEAD + pip install --pre azure-data-tables azure-storage-blob pandas matplotlib + stats=$(python3 tools/plot.py \ + --connection "${{ secrets.AZURE_STORAGE_CONNECTION_STRING }}" \ + --table ${{ secrets.AZURE_STORAGE_TABLE_NAME }} \ + --key ${{ secrets.AZURE_STORAGE_KEY }} \ + --container ${{ secrets.AZURE_STORAGE_CONTAINER }} \ + --no-plot) + EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64) + echo "TEXT<<$EOF" >> $GITHUB_OUTPUT + echo "$stats" >> $GITHUB_OUTPUT + echo "$EOF" >> $GITHUB_OUTPUT + echo "$stats" + echo "$EOF" + - name: Post Comment + uses: actions/github-script@v7 + with: + github-token: ${{secrets.GITHUB_TOKEN}} + script: | + const message = "${{ steps.parse-stats.outputs.TEXT }}" + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: 'Hello' + }) diff --git a/tools/plot.py b/tools/plot.py new file mode 100644 index 0000000000..5edc0c81a8 --- /dev/null +++ b/tools/plot.py @@ -0,0 +1,281 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +import datetime +import fnmatch +import json +import subprocess +from typing import List +from azure.data.tables import TableServiceClient +import pandas +import matplotlib.pyplot as plt +import argparse +from azure.storage.blob import BlobClient + +# ===================================================================================================================== + + +# Drives the program. +def main(): + # Read arguments from command line and parse them. + args: argparse.Namespace = __read_args() + + # Extract optionss. + table_name: str = args.table + container_name: str = args.container + connection_str: str = args.connection + key: str = args.key + no_plot: bool = args.no_plot + + __plot_performance(table_name=table_name, container_name=container_name, + connection_str=connection_str, key=key, no_plot=no_plot) + + +# Reads and parses command line arguments. +def __read_args() -> argparse.Namespace: + description: str = "CI Utility for pllot performance statistics of Demikernel." + + # Initialize parser. + parser = argparse.ArgumentParser(prog="plot.py", description=description) + + # Options related to Storage account. + parser.add_argument("--table", required=True, help="Set Azure Table to use.") + parser.add_argument("--container", required=True, help="Set Azure Blob Container to use.") + + # Options related to credentials. + parser.add_argument("--connection", required=True, help="Set connection string to access Azure Storage Account.") + parser.add_argument("--key", required=True, help="Set connection key to access Azure Storage Account.") + + parser.add_argument("--no-plot", action="store_true", help="Do not plot the performance statistics.") + + # Read arguments from command line. + return parser.parse_args() + + +# Get head commit on branch. +def get_head_commit() -> str: + cmd = "git show --format=%H -s HEAD" + git_cmd = "bash -l -c \'{}\'".format(cmd) + git_process = subprocess.Popen( + git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + git_stdout, _ = git_process.communicate() + return git_stdout.replace("\n", "") + + +# Get first commit on branch. +def get_first_commit_on_branch(head_commit: str) -> str: + cmd = f"git rev-list --max-parents=0 {head_commit}" + git_cmd = "bash -l -c \'{}\'".format(cmd) + git_process = subprocess.Popen( + git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + git_stdout, _ = git_process.communicate() + return git_stdout.replace("\n", "") + + +def check_if_merge_commit(commit_hash: str) -> bool: + cmd = "git show --format=%P -s {}".format(commit_hash) + git_cmd = "bash -l -c \'{}\'".format(cmd) + git_process = subprocess.Popen( + git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + git_stdout, _ = git_process.communicate() + return len(git_stdout.split()) > 1 + + +def check_if_head_commit(commit_hash: str) -> bool: + head_commit: str = get_head_commit() + True if commit_hash == head_commit else False + + +def get_short_commit_hash(commit_hash: str) -> int: + cmd = "git rev-parse --short {}".format(commit_hash) + git_cmd = "bash -l -c \'{}\'".format(cmd) + git_process = subprocess.Popen( + git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + git_stdout, _ = git_process.communicate() + return int(git_stdout.replace("\n", ""), 16) + +# Compute distance of two commit hashes. + + +def get_distance_of_commits(head_commit: str) -> int: + base_commit = get_first_commit_on_branch(head_commit) + cmd = "git rev-list --count {}..{}".format(base_commit, head_commit) + git_cmd = "bash -l -c \'{}\'".format(cmd) + git_process = subprocess.Popen( + git_cmd, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + git_stdout, _ = git_process.communicate() + git_stdout = git_stdout.replace("\n", "") + if git_stdout == "": + git_stdout = "0" + # print(f"Distance between {base_commit} and {commit_hash1} is {git_stdout}") + return int(git_stdout) + + +def __plot_performance(table_name: str, container_name: str, connection_str: str, key: str, no_plot: bool) -> None: + # Connect to Azure table. + table_service = TableServiceClient.from_connection_string(connection_str) + table_client = table_service.get_table_client(table_name) + + print(f"Querying Azure Table for performance statistics...") + + # Query Azure table for statistics on the past 30 days. + base_date = datetime.datetime.now() - datetime.timedelta(days=15) + # print(f"Querying Azure Table for performance statistics since {base_date}...") + query_filter: str = f"Timestamp gt datetime'{base_date.strftime('%Y-%m-%dT%H:%M:%S.%fZ')}' and" + \ + "(LibOS eq 'catnap' or LibOS eq 'catpowder' or LibOS eq 'catnip') and (Syscall eq 'push' or Syscall eq 'pop')" + select: List[str] = ["LibOS", "JobName", "CommitHash", "Syscall", "AverageCyclesPerSyscall"] + data = table_client.query_entities(query_filter=query_filter, select=select) + + cooked_data = { + "tcp-ping-pong-server": { + "push": { + "catnap": { + "diff": [], + "cycles": [], + "commit": [] + }, + "catpowder": { + "diff": [], + "cycles": [], + "commit": [] + }, + "catnip": { + "diff": [], + "cycles": [], + "commit": [] + }, + }, + "pop": { + "catnap": { + "diff": [], + "cycles": [], + "commit": [] + }, + "catpowder": { + "diff": [], + "cycles": [], + "commit": [] + }, + "catnip": { + "diff": [], + "cycles": [], + "commit": [] + }, + }, + }, + "tcp-ping-pong-client": { + "push": { + "catnap": { + "diff": [], + "cycles": [], + "commit": [] + }, + "catpowder": { + "diff": [], + "cycles": [], + "commit": [] + }, + "catnip": { + "diff": [], + "cycles": [], + "commit": [] + }, + }, + "pop": { + "catnap": { + "diff": [], + "cycles": [], + "commit": [] + }, + "catpowder": { + "diff": [], + "cycles": [], + "commit": [] + }, + "catnip": { + "diff": [], + "cycles": [], + "commit": [] + }, + }, + } + } + + job_types = ["tcp-ping-pong-server", "tcp-ping-pong-client"] + syscalls = ["push", "pop"] + libos_types = ["catnap", "catpowder", "catnip"] + + # Hashtable of commits. + commits = {} + head_commit: str = get_head_commit() + root_commit: str = get_first_commit_on_branch(head_commit) + + print(f"Processing commits since form {root_commit} to {head_commit}...") + + # Parse queried data. + for row in data: + for job_type in job_types: + if fnmatch.fnmatch(row["JobName"], f"*{job_type}*"): + for syscall in syscalls: + if syscall in row["Syscall"]: + for libos_type in libos_types: + if libos_type in row["LibOS"]: + hash = row["CommitHash"] + if check_if_merge_commit(hash) or check_if_head_commit(hash): + # check if we have already processed this commit + if not (libos_type, hash, syscall) in commits: + # if check_if_head_commit(hash): + # print(f"Processing head commit {hash}...") + # else: + # print(f"Processing merge commit {hash}...") + commits[(libos_type, hash, syscall)] = True + cooked_data[job_type][syscall][libos_type]["diff"].append( + get_distance_of_commits(hash)) + cooked_data[job_type][syscall][libos_type]["cycles"].append( + row["AverageCyclesPerSyscall"]) + cooked_data[job_type][syscall][libos_type]["commit"].append( + get_short_commit_hash(hash)) + + # print(json.dumps(cooked_data, indent=4)) + for job_type in job_types: + for syscall in syscalls: + catpowder_df = pandas.DataFrame(cooked_data[job_type][syscall]["catpowder"]) + catpowder_df.sort_values(by=['diff'], inplace=True) + catnap_df = pandas.DataFrame(cooked_data[job_type][syscall]["catnap"]) + catnap_df.sort_values(by=['diff'], inplace=True) + catnip_df = pandas.DataFrame(cooked_data[job_type][syscall]["catnip"]) + catnip_df.sort_values(by=['diff'], inplace=True) + df = pandas.merge(catpowder_df, catnap_df, on=["commit", "diff"]) + df = pandas.merge(df, catnip_df, on=["commit", "diff"]) + df.columns = ["Diff", "Catpowder", "Commit", "Catnap", "Catnip"] + new_order = ["Diff", "Commit", "Catnap", "Catpowder", "Catnip"] + df = df.reindex(columns=new_order) + if not df.empty: + if not no_plot: + df.plot(x="Diff", y=["Catpowder", "Catnap", "Catnip"], + kind="line", marker='o', + title=f"Performance for {syscall.capitalize()}()", + xlabel="Commit Hash", + ylabel="Average Cycles Spent in Syscall", + legend=True, ylim=(0, 5000)) + plt.xticks(rotation=90, ticks=df["Diff"], labels=df["Commit"]) + plt.savefig(f"{job_type}-{syscall}.png", bbox_inches='tight', dpi=300) + upload_image_to_blob("demikernel", key, container_name, + f"{head_commit}-{job_type}-{syscall}.png", f"{job_type}-{syscall}.png") + else: + print(f"\nPerformance for {syscall.capitalize()}() in {job_type}:") + print(df) + + +def upload_image_to_blob(account_name, account_key, container_name, blob_name, image_path): + blob_client = BlobClient(account_url=f"https://{account_name}.blob.core.windows.net", + container_name=container_name, + blob_name=blob_name, + credential=account_key) + + with open(image_path, 'rb') as f: + blob_client.upload_blob(f, overwrite=True) + + +if __name__ == "__main__": + main()