@@ -38,7 +38,7 @@ https://github.com/intel/llvm/issues
38
38
39
39
== Dependencies
40
40
41
- This extension is written against the SYCL 2020 revision 6 specification. All
41
+ This extension is written against the SYCL 2020 revision 7 specification. All
42
42
references below to the "core SYCL specification" or to section numbers in the
43
43
SYCL specification refer to that revision.
44
44
@@ -79,9 +79,9 @@ recording mechanism, similar to the initial kernel fusion proposal; and another
79
79
one using explicit graph building. Thus, future users will be able to choose
80
80
from two different mechanisms to construct the sequence of kernels to fuse. As
81
81
there is an explicit step for finalization of graphs before being submitted for
82
- execution, the fusion step can happen asynchronously and also eliminates many of
83
- the synchronization concerns that needed to be covered in the experimental
84
- kernel fusion proposal.
82
+ execution, fusion can happen in this step, which also eliminates many of the
83
+ synchronization concerns that needed to be covered in the experimental kernel
84
+ fusion proposal.
85
85
86
86
The aim of this document is to propose a mechanism for users to request the
87
87
fusion of two or more kernels in a SYCL graph into a single kernel **at
@@ -460,29 +460,32 @@ struct AddKernel {
460
460
461
461
int main() {
462
462
constexpr size_t dataSize = 512;
463
- int in1[dataSize], in2[dataSize], in3[dataSize], tmp1[dataSize],
464
- tmp2[dataSize], tmp3[dataSize], out[dataSize];
463
+ int in1[dataSize], in2[dataSize], in3[dataSize], out[dataSize];
465
464
466
465
queue q{default_selector_v};
467
466
468
- ext::oneapi::experimental::command_graph graph{q.get_context(),
469
- q.get_device()};
470
467
{
471
468
buffer<int> bIn1{in1, range{dataSize}};
469
+ bIn1.set_write_back(false);
472
470
buffer<int> bIn2{in2, range{dataSize}};
471
+ bIn2.set_write_back(false);
473
472
buffer<int> bIn3{in3, range{dataSize}};
474
- buffer<int> bTmp1{tmp1, range{dataSize}};
473
+ bIn3.set_write_back(false);
474
+ buffer<int> bTmp1{range{dataSize}};
475
475
// Internalization specified on the buffer
476
476
buffer<int> bTmp2{
477
- tmp2,
478
477
range{dataSize},
479
478
{sycl::ext::oneapi::experimental::property::promote_private{}}};
480
479
// Internalization specified on the buffer
481
480
buffer<int> bTmp3{
482
- tmp3,
483
481
range{dataSize},
484
482
{sycl::ext::oneapi::experimental::property::promote_private{}}};
485
483
buffer<int> bOut{out, range{dataSize}};
484
+ bOut.set_write_back(false);
485
+
486
+ ext::oneapi::experimental::command_graph graph{
487
+ q.get_context(), q.get_device(),
488
+ sycl::ext::oneapi::experimental::property::graph::no_host_copy{}};
486
489
487
490
graph.begin_recording(q);
488
491
@@ -530,6 +533,8 @@ int main() {
530
533
command_graph::perform_fusion});
531
534
532
535
q.ext_oneapi_graph(exec_graph);
536
+
537
+ q.wait();
533
538
}
534
539
return 0;
535
540
}
@@ -635,4 +640,5 @@ Ewan Crawford, Codeplay +
635
640
|1|2023-02-16|Lukas Sommer|*Initial draft*
636
641
|2|2023-03-16|Lukas Sommer|*Remove reference to outdated `add_malloc_device` API*
637
642
|3|2023-04-11|Lukas Sommer|*Update usage examples for graph API changes*
643
+ |4|2023-08-17|Lukas Sommer|*Update after graph extension has been merged*
638
644
|========================================
0 commit comments