-
Notifications
You must be signed in to change notification settings - Fork 769
[SYCL] Split device images based on accuracy level provided in option #10140
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
94ac8d5
Fix frontend issues after PR#8280
againull f235c44
[SYCL] Split device images based on accuracy level provided in option
againull 649fd15
Add frontend test
againull 404f82e
Metadata propagation test
againull d107740
Add info to design documentation
againull a537ca9
Address review comments
againull edf01dd
Merge remote-tracking branch 'origin/sycl' into fp_accuracy_image_spl…
againull c4b0c56
Remove unnecessary include
againull 756690c
Initialize local variable to avoid static analyzer issues
againull 4dd7de1
Remove unnecessary fix
againull 9408518
Remove parentheses
againull 2fb28d3
Merge remote-tracking branch 'origin/sycl' into orig_patch
againull 9807f64
Add additional RUN lines for TU and mixed cases
againull a13c1ff
Format
againull c3afa41
Fix EOL
againull 8e025fd
Address review comments
againull File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -ffp-builtin-accuracy=high:sin,sqrt -ffp-builtin-accuracy=medium:cos -ffp-builtin-accuracy=low:tan -ffp-builtin-accuracy=cuda:exp,acos -ffp-builtin-accuracy=sycl:log,asin -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck --check-prefix CHECK-FUNC %s | ||
// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -ffp-builtin-accuracy=high -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck --check-prefix CHECK-TU %s | ||
// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -ffp-builtin-accuracy=medium -ffp-builtin-accuracy=high:sin,sqrt -ffp-builtin-accuracy=medium:cos -ffp-builtin-accuracy=cuda:exp -ffp-builtin-accuracy=sycl:log -emit-llvm -triple spir64-unknown-unknown %s -o - | FileCheck --check-prefix CHECK-MIX %s | ||
|
||
// Tests that sycl_used_aspects metadata is attached to the fpbuiltin call based on -ffp-accuracy option. | ||
|
||
#include "sycl.hpp" | ||
|
||
extern "C" SYCL_EXTERNAL double sin(double); | ||
extern "C" SYCL_EXTERNAL double cos(double); | ||
extern "C" SYCL_EXTERNAL double tan(double); | ||
extern "C" SYCL_EXTERNAL double log(double); | ||
extern "C" SYCL_EXTERNAL double exp(double); | ||
extern "C" SYCL_EXTERNAL double acos(double); | ||
extern "C" SYCL_EXTERNAL double asin(double); | ||
extern "C" SYCL_EXTERNAL double sqrt(double); | ||
|
||
using namespace sycl; | ||
|
||
int main() { | ||
const unsigned array_size = 4; | ||
double Value = .5; | ||
queue deviceQueue; | ||
range<1> numOfItems{array_size}; | ||
|
||
// Kernel0 doesn't use math functions. | ||
deviceQueue.submit([&](handler& cgh) { | ||
cgh.parallel_for<class Kernel0>(numOfItems, | ||
[=](id<1> wiID) { | ||
(void)Value; | ||
}); | ||
}); | ||
|
||
// Kernel1 uses high-accuracy sin. | ||
deviceQueue.submit([&](handler& cgh) { | ||
cgh.parallel_for<class Kernel1>(numOfItems, | ||
[=](id<1> wiID) { | ||
// CHECK-FUNC: call double @llvm.fpbuiltin.sin.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC:[0-9]+]] | ||
// CHECK-TU: call double @llvm.fpbuiltin.sin.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC:[0-9]+]] | ||
// CHECK-MIX: call double @llvm.fpbuiltin.sin.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC:[0-9]+]] | ||
(void)sin(Value); | ||
}); | ||
}); | ||
|
||
deviceQueue.submit([&](handler& cgh) { | ||
cgh.parallel_for<class Kernel2>(numOfItems, | ||
[=](id<1> wiID) { | ||
// CHECK-FUNC: call double @llvm.fpbuiltin.cos.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[MEDIUM_ACC:[0-9]+]] | ||
// CHECK-TU: call double @llvm.fpbuiltin.cos.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]] | ||
// CHECK-MIX: call double @llvm.fpbuiltin.cos.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[MEDIUM_ACC:[0-9]+]] | ||
(void)cos(Value); | ||
}); | ||
}); | ||
|
||
// Kernel3 uses low-accuracy tan. | ||
deviceQueue.submit([&](handler& cgh) { | ||
cgh.parallel_for<class Kernel3>(numOfItems, | ||
[=](id<1> wiID) { | ||
// CHECK-FUNC: call double @llvm.fpbuiltin.tan.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[LOW_ACC:[0-9]+]] | ||
// CHECK-TU: call double @llvm.fpbuiltin.tan.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]] | ||
// CHECK-MIX: call double @llvm.fpbuiltin.tan.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[MEDIUM_ACC]] | ||
(void)tan(Value); | ||
}); | ||
}); | ||
|
||
// Kernel4 uses cuda-accuracy exp and sycl-accuracy log. | ||
deviceQueue.submit([&](handler& cgh) { | ||
cgh.parallel_for<class Kernel4>(numOfItems, | ||
[=](id<1> wiID) { | ||
// CHECK-FUNC: call double @llvm.fpbuiltin.exp.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[CUDA_ACC:[0-9]+]] | ||
// CHECK-FUNC: call double @llvm.fpbuiltin.log.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[SYCL_ACC:[0-9]+]] | ||
// CHECK-TU: call double @llvm.fpbuiltin.exp.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]] | ||
// CHECK-TU: call double @llvm.fpbuiltin.log.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]] | ||
// CHECK-MIX: call double @llvm.fpbuiltin.exp.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[CUDA_ACC:[0-9]+]] | ||
// CHECK-MIX: call double @llvm.fpbuiltin.log.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[SYCL_ACC:[0-9]+]] | ||
(void)log(exp(Value)); | ||
}); | ||
}); | ||
deviceQueue.wait(); | ||
|
||
// Kernel5 uses cuda-accuracy acos. | ||
deviceQueue.submit([&](handler& cgh) { | ||
cgh.parallel_for<class Kernel5>(numOfItems, | ||
[=](id<1> wiID) { | ||
// CHECK-FUNC: call double @llvm.fpbuiltin.acos.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[CUDA_ACC]] | ||
// CHECK-TU: call double @llvm.fpbuiltin.acos.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]] | ||
// CHECK-MIX: call double @llvm.fpbuiltin.acos.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[MEDIUM_ACC]] | ||
(void)acos(Value); | ||
}); | ||
}); | ||
|
||
// Kernel6 uses sycl-accuracy asin. | ||
deviceQueue.submit([&](handler& cgh) { | ||
cgh.parallel_for<class Kernel6>(numOfItems, | ||
[=](id<1> wiID) { | ||
// CHECK-FUNC: call double @llvm.fpbuiltin.asin.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[SYCL_ACC]] | ||
// CHECK-TU: call double @llvm.fpbuiltin.asin.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]] | ||
// CHECK-MIX: call double @llvm.fpbuiltin.asin.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[MEDIUM_ACC]] | ||
(void)asin(Value); | ||
}); | ||
}); | ||
|
||
// Kernel7 uses high-accuracy sqrt. | ||
deviceQueue.submit([&](handler& cgh) { | ||
cgh.parallel_for<class Kernel7>(numOfItems, | ||
[=](id<1> wiID) { | ||
// CHECK-FUNC: call double @llvm.fpbuiltin.sqrt.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]] | ||
// CHECK-TU: call double @llvm.fpbuiltin.sqrt.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]] | ||
// CHECK-MIX: call double @llvm.fpbuiltin.sqrt.f64(double {{.*}}) #[[ATTR:[0-9]+]], !sycl_used_aspects ![[HIGH_ACC]] | ||
(void)sqrt(Value); | ||
}); | ||
}); | ||
return 0; | ||
} | ||
|
||
// CHECK-FUNC: [[HIGH_ACC]] = !{i32 -1} | ||
// CHECK-FUNC: [[MEDIUM_ACC]] = !{i32 -2} | ||
// CHECK-FUNC: [[LOW_ACC]] = !{i32 -3} | ||
// CHECK-FUNC: [[CUDA_ACC]] = !{i32 -5} | ||
// CHECK-FUNC: [[SYCL_ACC]] = !{i32 -4} | ||
|
||
// CHECK-TU: [[HIGH_ACC]] = !{i32 -1} | ||
|
||
// CHECK-MIX: [[HIGH_ACC]] = !{i32 -1} | ||
// CHECK-MIX: [[MEDIUM_ACC]] = !{i32 -2} | ||
// CHECK-MIX: [[CUDA_ACC]] = !{i32 -5} | ||
// CHECK-MIX: [[SYCL_ACC]] = !{i32 -4} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
66 changes: 66 additions & 0 deletions
66
llvm/test/SYCLLowerIR/PropagateAspectsUsage/call-graph-inst.ll
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
; RUN: opt -passes=sycl-propagate-aspects-usage < %s -S | FileCheck %s | ||
; | ||
; Test checks that the pass is able to propagate information about aspects | ||
; used in the instruction through a call graph | ||
; | ||
; K1 K2 | ||
; / \/ \ | ||
; F1 F2 F3 | ||
; | ||
; F1 doesn't use optional type and doesn't have instruction with attached 'sycl_used_aspects' metadata. | ||
; F2 uses optional A and has instruction with attached 'sycl_used_aspects' metadata. | ||
; F3 uses optional B and has instruction with attached 'sycl_used_aspects' metadata. | ||
|
||
%Optional.A = type { i32 } | ||
%Optional.B = type { i32 } | ||
|
||
; CHECK: spir_kernel void @kernel1() !sycl_used_aspects ![[#ID1:]] | ||
define spir_kernel void @kernel1() { | ||
call spir_func void @func1() | ||
call spir_func void @func2() | ||
ret void | ||
} | ||
|
||
; CHECK: spir_kernel void @kernel2() !sycl_used_aspects ![[#ID2:]] | ||
define spir_kernel void @kernel2() { | ||
call spir_func void @func2() | ||
call spir_func void @func3() | ||
ret void | ||
} | ||
|
||
; CHECK: spir_func void @func1() { | ||
define spir_func void @func1() { | ||
%tmp = alloca i32 | ||
ret void | ||
} | ||
|
||
declare void @llvm.fpbuiltin.f64() | ||
|
||
; CHECK: spir_func void @func2() !sycl_used_aspects ![[#ID1]] { | ||
define spir_func void @func2() { | ||
%tmp1 = alloca %Optional.A | ||
call void @llvm.fpbuiltin.f64(), !sycl_used_aspects !3 | ||
ret void | ||
} | ||
|
||
; CHECK: spir_func void @func3() !sycl_used_aspects ![[#ID3:]] { | ||
define spir_func void @func3() { | ||
%tmp = alloca %Optional.B | ||
call void @llvm.fpbuiltin.f64(), !sycl_used_aspects !4 | ||
ret void | ||
} | ||
|
||
!sycl_types_that_use_aspects = !{!0, !1} | ||
!0 = !{!"Optional.A", i32 1} | ||
!1 = !{!"Optional.B", i32 2} | ||
|
||
!sycl_aspects = !{!2} | ||
!2 = !{!"fp64", i32 6} | ||
!3 = !{i32 -1} | ||
!4 = !{i32 -2} | ||
|
||
; CHECK: ![[#ID1]] = !{i32 1, i32 -1} | ||
; CHECK: ![[#ID2]] = !{i32 1, i32 -1, i32 2, i32 -2} | ||
; CHECK: ![[#ID3]] = !{i32 2, i32 -2} | ||
|
||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to add an assert here to ensure this function is called with appropriate FPAccuracyStr?
Thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added assert, thanks.