Skip to content

[Benchmarks] Add benchmarks logger #19158

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions devops/scripts/benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,27 @@ are stored [here](https://oneapi-src.github.io/unified-runtime/performance/).
## Output formats
You can display the results in the form of a HTML file by using `--ouptut-html` and a markdown file by using `--output-markdown`. Due to character limits for posting PR comments, the final content of the markdown file might be reduced. In order to obtain the full markdown output, use `--output-markdown full`.

## Logging

The benchmark runner uses a configurable logging system with different log levels that can be set using the `--log-level` command-line option.

Available log levels:
- `debug`
- `info` (default)
- `warning`
- `error`
- `critical`

To set the log level, use the `--log-level` option:
```bash
./main.py ~/benchmarks_workdir/ --sycl ~/llvm/build/ --log-level debug
```

You can also use the `--verbose` flag, which sets the log level to `debug` and overrides any `--log-level` setting:
```bash
./main.py ~/benchmarks_workdir/ --sycl ~/llvm/build/ --verbose
```

## Requirements

### Python
Expand Down
6 changes: 4 additions & 2 deletions devops/scripts/benchmarks/benches/benchdnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception


from pathlib import Path

from .base import Suite, Benchmark
from options import options
from utils.utils import git_clone, run, create_build_path
from utils.result import Result
from utils.oneapi import get_oneapi
from utils.logger import log
from .benchdnn_list import get_bench_dnn_list


Expand Down Expand Up @@ -150,8 +153,7 @@ def run(self, env_vars):
)
result_value = self._extract_time(output)

if options.verbose:
print(f"[{self.name()}] Output: {output}")
log.debug(f"[{self.name()}] Output: {output}")

return [
Result(
Expand Down
11 changes: 5 additions & 6 deletions devops/scripts/benchmarks/benches/gromacs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import os
import subprocess
from pathlib import Path
import re

from .base import Suite, Benchmark
from options import options
from utils.utils import git_clone, download, run, create_build_path
from utils.result import Result
from utils.oneapi import get_oneapi
import re
from utils.logger import log


class GromacsBench(Suite):

def git_url(self):
return "https://gitlab.com/gromacs/gromacs.git"

Expand Down Expand Up @@ -209,8 +209,7 @@ def run(self, env_vars):

time = self._extract_execution_time(mdrun_output)

if options.verbose:
print(f"[{self.name()}] Time: {time:.3f} seconds")
log.debug(f"[{self.name()}] Time: {time:.3f} seconds")

return [
Result(
Expand Down Expand Up @@ -259,7 +258,7 @@ def _validate_correctness(self, log_file):
drift_value = float(match.group(1))
return abs(drift_value) <= threshold
except ValueError:
print(
log.warning(
f"Parsed drift value: {drift_value} exceeds threshold"
)
return False
Expand Down
15 changes: 7 additions & 8 deletions devops/scripts/benchmarks/benches/umf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,17 @@
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import random
from utils.utils import git_clone
from .base import Benchmark, Suite
from utils.result import Result
from utils.utils import run, create_build_path
from options import options
from utils.oneapi import get_oneapi
import os
import csv
import io
import re

from .base import Benchmark, Suite
from utils.result import Result
from options import options
from utils.oneapi import get_oneapi
from utils.logger import log


def isUMFAvailable():
return options.umf is not None
Expand Down Expand Up @@ -93,7 +92,7 @@ def extra_env_vars(self) -> dict:

def setup(self):
if not isUMFAvailable():
print("UMF prefix path not provided")
log.warning("UMF prefix path not provided")
return

self.oneapi = get_oneapi()
Expand Down
62 changes: 36 additions & 26 deletions devops/scripts/benchmarks/compare.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
from utils.aggregate import Aggregator, SimpleMedian
from utils.validate import Validate
from utils.result import Result, BenchmarkRun
from options import options
# Copyright (C) 2024-2025 Intel Corporation
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import os
import re
import sys
import json
import argparse
from datetime import datetime, timezone
from pathlib import Path
from dataclasses import dataclass, asdict

from utils.aggregate import Aggregator, SimpleMedian
from utils.validate import Validate
from utils.result import BenchmarkRun
from utils.logger import log
from options import options


@dataclass
class BenchmarkHistoricAverage:
Expand Down Expand Up @@ -113,8 +117,8 @@ def validate_benchmark_result(result: BenchmarkRun) -> bool:
if result.hostname != hostname:
return False
if result.name != result_name:
print(
f"Warning: Result file {result_path} does not match specified result name {result.name}."
log.warning(
f"Result file {result_path} does not match specified result name {result.name}."
)
return False
if result.date < datetime.strptime(cutoff, "%Y%m%d_%H%M%S").replace(
Expand Down Expand Up @@ -256,24 +260,24 @@ def to_hist(
"""

if avg_type != "median":
print("Only median is currently supported: Refusing to continue.")
log.error("Only median is currently supported: Refusing to continue.")
exit(1)

try:
with open(compare_file, "r") as compare_f:
compare_result = BenchmarkRun.from_json(json.load(compare_f))
except:
print(f"Unable to open {compare_file}.")
log.error(f"Unable to open {compare_file}.")
exit(1)

# Sanity checks:
if compare_result.hostname == "Unknown":
print(
log.error(
"Hostname for results in {compare_file} unknown, unable to build a historic average: Refusing to continue."
)
exit(1)
if not Validate.timestamp(cutoff):
print("Invalid timestamp provided, please follow YYYYMMDD_HHMMSS.")
log.error("Invalid timestamp provided, please follow YYYYMMDD_HHMMSS.")
exit(1)

# Build historic average and compare results against historic average:
Expand Down Expand Up @@ -331,7 +335,7 @@ def to_hist(

if args.operation == "to_hist":
if args.avg_type != "median":
print("Only median is currently supported: exiting.")
log.error("Only median is currently supported: exiting.")
exit(1)
if not Validate.timestamp(args.cutoff):
raise ValueError("Timestamp must be provided as YYYYMMDD_HHMMSS.")
Expand All @@ -352,28 +356,34 @@ def to_hist(
else:
regressions_ignored.append(test)

def print_regression(entry: dict):
"""Print an entry outputted from Compare.to_hist"""
print(f"Test: {entry['name']}")
print(f"-- Historic {entry['avg_type']}: {entry['hist_avg']}")
print(f"-- Run result: {entry['value']}")
print(f"-- Delta: {entry['delta']}")
print("")
def print_regression(entry: dict, is_warning: bool = False):
"""Print an entry outputted from Compare.to_hist

Args:
entry (dict): The entry to print
is_warning (bool): If True, use log.warning instead of log.info
"""
log_func = log.warning if is_warning else log.info
log_func(f"Test: {entry['name']}")
log_func(f"-- Historic {entry['avg_type']}: {entry['hist_avg']}")
log_func(f"-- Run result: {entry['value']}")
log_func(f"-- Delta: {entry['delta']}")
log_func("")

if improvements:
print("#\n# Improvements:\n#\n")
log.info("#\n# Improvements:\n#\n")
for test in improvements:
print_regression(test)
if regressions_ignored:
print("#\n# Regressions (filtered out by regression-filter):\n#\n")
log.info("#\n# Regressions (filtered out by regression-filter):\n#\n")
for test in regressions_ignored:
print_regression(test)
if regressions_of_concern:
print("#\n# Regressions:\n#\n")
log.warning("#\n# Regressions:\n#\n")
for test in regressions_of_concern:
print_regression(test)
print_regression(test, is_warning=True)
exit(1) # Exit 1 to trigger github test failure
print("\nNo unexpected regressions found!")
log.info("\nNo unexpected regressions found!")
else:
print("Unsupported operation: exiting.")
log.error("Unsupported operation: exiting.")
exit(1)
17 changes: 9 additions & 8 deletions devops/scripts/benchmarks/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@
import json
from pathlib import Path
import socket

from utils.result import Result, BenchmarkRun
from options import Compare, options
from datetime import datetime, timezone, timedelta
from utils.utils import run
from utils.validate import Validate

from utils.logger import log
from utils.detect_versions import DetectVersion


Expand All @@ -33,8 +34,8 @@ def load_result(self, file_path: Path) -> BenchmarkRun:
def load(self, n: int):
results_dir = Path(self.dir) / "results"
if not results_dir.exists() or not results_dir.is_dir():
print(
f"Warning: {results_dir} is not a valid directory: no historic results loaded."
log.warning(
f"{results_dir} is not a valid directory: no historic results loaded."
)
return

Expand Down Expand Up @@ -97,7 +98,7 @@ def git_info_from_path(path: Path) -> (str, str):

if options.git_commit_override is None or options.github_repo_override is None:
if options.detect_versions.sycl:
print(f"Auto-detecting sycl version...")
log.info(f"Auto-detecting sycl version...")
github_repo, git_hash = DetectVersion.instance().get_dpcpp_git_info()
else:
git_hash, github_repo = git_info_from_path(
Expand Down Expand Up @@ -129,12 +130,12 @@ def git_info_from_path(path: Path) -> (str, str):
if options.build_compute_runtime:
compute_runtime = options.compute_runtime_tag
elif options.detect_versions.compute_runtime:
print(f"Auto-detecting compute_runtime version...")
log.info(f"Auto-detecting compute_runtime version...")
detect_res = DetectVersion.instance()
compute_runtime = detect_res.get_compute_runtime_ver()
if detect_res.get_compute_runtime_ver_cached() is None:
print(
"Warning: Could not find compute_runtime version via github tags API."
log.warning(
"Could not find compute_runtime version via github tags API."
)
else:
compute_runtime = "unknown"
Expand Down Expand Up @@ -169,7 +170,7 @@ def save(self, save_name, results: list[Result], to_file=True):
file_path = Path(os.path.join(results_dir, f"{save_name}_{timestamp}.json"))
with file_path.open("w") as file:
json.dump(serialized, file, indent=4)
print(f"Benchmark results saved to {file_path}")
log.info(f"Benchmark results saved to {file_path}")

def find_first(self, name: str) -> BenchmarkRun:
for r in self.runs:
Expand Down
Loading
Loading