Skip to content

[SYCL][E2E] Enable llvm-lit to accept device architecture #18197

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
May 9, 2025
7 changes: 7 additions & 0 deletions sycl/test-e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,13 @@ separated from comma-separated list of target devices with colon. Example:
-DSYCL_TEST_E2E_TARGETS="opencl:cpu;level_zero:gpu;cuda:gpu;hip:gpu"
```

In addition, device architecture as shown in sycl-ls is accepted with the
"arch-" prefix. Example:

```bash
-DSYCL_TEST_E2E_TARGETS="cuda:arch-nvidia_gpu_sm_61;level_zero:arch-intel_gpu_bmg_b21"
```

***OpenCL_LIBRARY*** - path to OpenCL ICD loader library. OpenCL
interoperability tests require OpenCL ICD loader to be linked with. For such
tests OpenCL ICD loader library should be installed in the system or available
Expand Down
136 changes: 101 additions & 35 deletions sycl/test-e2e/lit.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@
# Disable the UR logger callback sink during test runs as output to SYCL RT can interfere with some tests relying on standard input/output
llvm_config.with_environment("UR_LOG_CALLBACK", "disabled")


# Temporarily modify environment to be the same that we use when running tests
class test_env:
def __enter__(self):
Expand Down Expand Up @@ -276,6 +277,7 @@ def quote_path(path):
return f'"{path}"'
return shlex.quote(path)


# Call the function to perform the check and add the feature
check_igc_tag_and_add_feature()

Expand All @@ -295,6 +297,7 @@ def quote_path(path):
if lit_config.params.get("spirv-backend", False):
config.available_features.add("spirv-backend")


# Use this to make sure that any dynamic checks below are done in the build
# directory and not where the sources are located. This is important for the
# in-tree configuration (as opposite to the standalone one).
Expand Down Expand Up @@ -634,22 +637,25 @@ def open_check_file(file_name):
if "amdgcn" in sp[1]:
config.sycl_build_targets.add("target-amd")

cmd = "{} {}".format(config.run_launcher, sycl_ls) if config.run_launcher else sycl_ls
sycl_ls_output = subprocess.check_output(cmd, text=True, shell=True)

# In contrast to `cpu` feature this is a compile-time feature, which is needed
# to check if we can build cpu AOT tests.
if "opencl:cpu" in sycl_ls_output:
config.available_features.add("opencl-cpu-rt")

if len(config.sycl_devices) == 1 and config.sycl_devices[0] == "all":
devices = set()
for line in sycl_ls_output.splitlines():
if not line.startswith("["):
continue
(backend, device) = line[1:].split("]")[0].split(":")
devices.add("{}:{}".format(backend, device))
config.sycl_devices = list(devices)
with test_env():
cmd = (
"{} {}".format(config.run_launcher, sycl_ls) if config.run_launcher else sycl_ls
)
sycl_ls_output = subprocess.check_output(cmd, text=True, shell=True)

# In contrast to `cpu` feature this is a compile-time feature, which is needed
# to check if we can build cpu AOT tests.
if "opencl:cpu" in sycl_ls_output:
config.available_features.add("opencl-cpu-rt")

if len(config.sycl_devices) == 1 and config.sycl_devices[0] == "all":
devices = set()
for line in sycl_ls_output.splitlines():
if not line.startswith("["):
continue
(backend, device) = line[1:].split("]")[0].split(":")
devices.add("{}:{}".format(backend, device))
config.sycl_devices = list(devices)

if len(config.sycl_devices) > 1:
lit_config.note(
Expand All @@ -670,7 +676,11 @@ def remove_level_zero_suffix(devices):
}
for d in remove_level_zero_suffix(config.sycl_devices):
be, dev = d.split(":")
if be not in available_devices or dev not in available_devices[be]:
# Verify platform
if be not in available_devices:
lit_config.error("Unsupported device {}".format(d))
# Verify device from available_devices or accept if contains "arch-"
if dev not in available_devices[be] and not "arch-" in dev:
lit_config.error("Unsupported device {}".format(d))

if "cuda:gpu" in config.sycl_devices:
Expand Down Expand Up @@ -828,6 +838,79 @@ def remove_level_zero_suffix(devices):
if config.test_mode != "build-only":
config.sycl_build_targets = set()


def get_sycl_ls_verbose(sycl_device, env):
with test_env():
# When using the ONEAPI_DEVICE_SELECTOR environment variable, sycl-ls
# prints warnings that might derail a user thinking something is wrong
# with their test run. It's just us filtering here, so silence them unless
# we get an exit status.
Comment on lines +844 to +847
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please outline to some helper instead of copy-pasting. While doing this please take a look with test_env():, it might be beneficial/necessary here and in the pre-existing sycl-ls invocation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added helper function and added with text_env() good idea to help prevent any environment weirdness.

try:
cmd = "{} {} --verbose".format(config.run_launcher or "", sycl_ls)
sp = subprocess.run(
cmd, env=env, text=True, shell=True, capture_output=True
)
sp.check_returncode()
except subprocess.CalledProcessError as e:
# capturing e allows us to see path resolution errors / system
# permissions errors etc
lit_config.fatal(
f"Cannot find devices under {sycl_device}\n"
f"{e}\n"
f"stdout:{sp.stdout}\n"
f"stderr:{sp.stderr}\n"
)
return sp.stdout.splitlines()


# A device filter such as level_zero:gpu can have multiple devices under it and
# the order is not guaranteed. The aspects enabled are also restricted to what
# is supported on all devices under the label. It is possible for level_zero:gpu
# and level_zero:0 to select different devices on different machines with the
# same hardware. It is not currently possible to pass the device architecture to
# ONEAPI_DEVICE_SELECTOR. Instead, if "BACKEND:arch-DEVICE_ARCH" is provided to
# "sycl_devices", giving the desired device architecture, select a device that
# matches that architecture using the backend:device-num device selection
# scheme.
filtered_sycl_devices = []
for sycl_device in remove_level_zero_suffix(config.sycl_devices):
backend, device_arch = sycl_device.split(":", 1)

if not "arch-" in device_arch:
filtered_sycl_devices.append(sycl_device)
continue

env = copy.copy(llvm_config.config.environment)

# Find all available devices under the backend
env["ONEAPI_DEVICE_SELECTOR"] = backend + ":*"

detected_architectures = []

for line in get_sycl_ls_verbose(backend + ":*", env):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i really double this actually matters but is it possible to prevent a second call to sycl-ls here and capture what we need in the first call?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we use different device filter?

Copy link
Contributor

@sarnex sarnex May 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah in this one we set a filter but it seems the other one has no device filter, so it's output should contain the info we need here, but if its not trivial to implement its not worth it

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The first sycl-ls, line 638 in the code is used to get all available devices if all is used so no filter is passed.

The second sycl-ls, line 882 is only called if the string arch- appears in the device filter. We could also grab the aspects at the same time but that feels messy.

Lastly, sycl-ls at line 938 is called again with all the filtered device filters to set available aspects and other properties.

Copy link
Contributor Author

@DBDuncan DBDuncan May 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We prob could remove one of the calls to sycl-ls in the code but I think the separation of concerns with filtering the device filters and then using them to query sycl-ls again to set test parameters makes things less tangled.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, thanks

if re.match(r" *Architecture:", line):
_, architecture = line.strip().split(":", 1)
detected_architectures.append(architecture.strip())

device = device_arch.replace("arch-", "")

if device in detected_architectures:
device_num = detected_architectures.index(device)
filtered_sycl_devices.append(backend + ":" + str(device_num))
else:
lit_config.warning(
"Couldn't find device with architecture {}"
" under {} device selector! Skipping device "
"{}".format(device, backend + ":*", sycl_device)
)

if not filtered_sycl_devices and not config.test_mode == "build-only":
lit_config.error(
"No sycl devices selected! Check your device " "architecture filters."
)

config.sycl_devices = filtered_sycl_devices

for sycl_device in remove_level_zero_suffix(config.sycl_devices):
be, dev = sycl_device.split(":")
config.available_features.add("any-device-is-" + dev)
Expand Down Expand Up @@ -856,31 +939,14 @@ def remove_level_zero_suffix(devices):
env["ONEAPI_DEVICE_SELECTOR"] = sycl_device
if sycl_device.startswith("cuda:"):
env["SYCL_UR_CUDA_ENABLE_IMAGE_SUPPORT"] = "1"
# When using the ONEAPI_DEVICE_SELECTOR environment variable, sycl-ls
# prints warnings that might derail a user thinking something is wrong
# with their test run. It's just us filtering here, so silence them unless
# we get an exit status.
try:
cmd = "{} {} --verbose".format(config.run_launcher or "", sycl_ls)
sp = subprocess.run(cmd, env=env, text=True, shell=True, capture_output=True)
sp.check_returncode()
except subprocess.CalledProcessError as e:
# capturing e allows us to see path resolution errors / system
# permissions errors etc
lit_config.fatal(
f"Cannot list device aspects for {sycl_device}\n"
f"{e}\n"
f"stdout:{sp.stdout}\n"
f"stderr:{sp.stderr}\n"
)

dev_aspects = []
dev_sg_sizes = []
architectures = set()
# See format.py's parse_min_intel_driver_req for explanation.
is_intel_driver = False
intel_driver_ver = {}
for line in sp.stdout.splitlines():
for line in get_sycl_ls_verbose(sycl_device, env):
if re.match(r" *Vendor *: Intel\(R\) Corporation", line):
is_intel_driver = True
if re.match(r" *Driver *:", line):
Expand Down
Loading