Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a script to split minimal tests into distinct directories depending on metrics differences #623

Merged
merged 2 commits into from
May 11, 2021
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add a script to split minimal tests
This script splits minimal tests into distinct directories depending on
their metric differences
  • Loading branch information
Luni-4 committed Apr 21, 2021
commit d24569c75779be90a948df6880a4623ce2b0398d
130 changes: 130 additions & 0 deletions split-minimal-tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
#!/usr/bin/env python3

"""split-minimal-tests
This script splits HTML minimal-tests, produced by a software called
`json-minimal-tests`, into distinct directories depending on metric differences.

Usage:

./split-minimal-tests.py -i INPUT_DIR -o OUTPUT_DIR [-t MT_THRESHOLD]

NOTE: OUTPUT_DIR is the path to the output directory to be created.
This directory could contain either a series of directories, called as
the metrics that presents differences, or be empty if no metric differences
are found.
MT_THRESHOLD determines the maximum number of considered minimal tests
for a metric.
"""

import argparse
import pathlib
import re
import shutil
import typing as T

# List of metrics
# TODO: Implement a command into rust-code-analysis-cli that returns all
# computed metrics https://github.com/mozilla/rust-code-analysis/issues/478
METRICS = [
"cognitive",
"sloc",
"ploc",
"lloc",
"cloc",
"blank",
"cyclomatic",
"halstead",
"nom",
"nexits",
"nargs",
]


def main() -> None:
parser = argparse.ArgumentParser(
prog="split-minimal-tests",
description="This tool splits HTML minimal-tests, produced by "
"a software called `json-minimal-tests`, into distinct directories "
"depending on metric differences.",
epilog="The source code of this program can be found on "
"GitHub at https://github.com/mozilla/rust-code-analysis",
)

# Arguments
parser.add_argument(
"--input",
"-i",
type=lambda value: pathlib.Path(value),
required=True,
help="Input directory containing HTML minimal tests.",
)

parser.add_argument(
"--output",
"-o",
type=lambda value: pathlib.Path(value),
required=True,
help="Path to the output directory.",
)

# Optional arguments
parser.add_argument(
"--threshold",
"-t",
type=int,
help="Maximum number of considered minimal tests for a metric.",
)

# Parse arguments
args = parser.parse_args()

# Create output directory
args.output.mkdir(parents=True, exist_ok=True)

# Save files associated to each metric
metrics_saver: T.Dict[str, T.List] = {metric_name: [] for metric_name in METRICS}

# Iterate over the files contained in the input directory
for path in args.input.glob("*.html"):
# Open a file
with open(path) as f:
# Read a file
file_str = f.read()

# Remove all code inside <pre></pre> tags
file_no_pre = re.sub(r"<pre>(.|\n)*?<\/pre>", "", file_str)

# Iterate over metrics
for metric_name, metric_files in metrics_saver.items():
# Check if there is a metric difference in a file
m = re.search(f"(\.{metric_name})", file_no_pre)

# If some errors occurred, skip to the next metric
if m is None:
continue

# Save path if there is a metric difference in a file
if m.group(1):
metric_files.append(path)

# Iterate over metrics to print them
for metric_name, metric_files in metrics_saver.items():
# Create path for metric directory
metric_path = args.output / metric_name

if metric_files:
# Create metric directory
metric_path.mkdir(parents=True, exist_ok=True)

# Save the number of files specified in the threshold
output_paths = (
metric_files[: args.threshold] if args.threshold else metric_files
)

for path in output_paths:
# Copy files in the directory
shutil.copy(path, metric_path)


if __name__ == "__main__":
main()