|
1 | 1 | // RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
|
2 | 2 |
|
3 |
| -extern "C" void acc_combined(int N) { |
4 |
| - // CHECK: cir.func @acc_combined(%[[ARG_N:.*]]: !s32i loc{{.*}}) { |
| 3 | +extern "C" void acc_combined(int N, int cond) { |
| 4 | + // CHECK: cir.func @acc_combined(%[[ARG_N:.*]]: !s32i loc{{.*}}, %[[ARG_COND:.*]]: !s32i loc{{.*}}) { |
5 | 5 | // CHECK-NEXT: %[[ALLOCA_N:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["N", init]
|
| 6 | + // CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["cond", init] |
6 | 7 | // CHECK-NEXT: cir.store %[[ARG_N]], %[[ALLOCA_N]] : !s32i, !cir.ptr<!s32i>
|
| 8 | + // CHECK-NEXT: cir.store %[[ARG_COND]], %[[COND]] : !s32i, !cir.ptr<!s32i> |
7 | 9 |
|
8 | 10 | #pragma acc parallel loop
|
9 | 11 | for(unsigned I = 0; I < N; ++I);
|
@@ -573,4 +575,150 @@ extern "C" void acc_combined(int N) {
|
573 | 575 | // CHECK-NEXT: } loc
|
574 | 576 | // CHECK: acc.terminator
|
575 | 577 | // CHECK-NEXT: } loc
|
| 578 | + |
| 579 | +#pragma acc parallel loop wait |
| 580 | + for(unsigned I = 0; I < N; ++I); |
| 581 | + // CHECK-NEXT: acc.parallel combined(loop) wait { |
| 582 | + // CHECK-NEXT: acc.loop combined(parallel) { |
| 583 | + // CHECK: acc.yield |
| 584 | + // CHECK-NEXT: } loc |
| 585 | + // CHECK-NEXT: acc.yield |
| 586 | + // CHECK-NEXT: } loc |
| 587 | + |
| 588 | +#pragma acc serial loop wait device_type(nvidia) wait |
| 589 | + for(unsigned I = 0; I < N; ++I); |
| 590 | + // CHECK-NEXT: acc.serial combined(loop) wait([#acc.device_type<none>, #acc.device_type<nvidia>]) { |
| 591 | + // CHECK-NEXT: acc.loop combined(serial) { |
| 592 | + // CHECK: acc.yield |
| 593 | + // CHECK-NEXT: } loc |
| 594 | + // CHECK-NEXT: acc.yield |
| 595 | + // CHECK-NEXT: } loc |
| 596 | + |
| 597 | +#pragma acc kernels loop wait(1) device_type(nvidia) wait |
| 598 | + for(unsigned I = 0; I < N; ++I); |
| 599 | + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i |
| 600 | + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 |
| 601 | + // CHECK-NEXT: acc.kernels combined(loop) wait([#acc.device_type<nvidia>], {%[[ONE_CAST]] : si32}) { |
| 602 | + // CHECK-NEXT: acc.loop combined(kernels) { |
| 603 | + // CHECK: acc.yield |
| 604 | + // CHECK-NEXT: } loc |
| 605 | + // CHECK-NEXT: acc.terminator |
| 606 | + // CHECK-NEXT: } loc |
| 607 | + |
| 608 | +#pragma acc parallel loop wait device_type(nvidia) wait(1) |
| 609 | + for(unsigned I = 0; I < N; ++I); |
| 610 | + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i |
| 611 | + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 |
| 612 | + // CHECK-NEXT: acc.parallel combined(loop) wait([#acc.device_type<none>], {%[[ONE_CAST]] : si32} [#acc.device_type<nvidia>]) { |
| 613 | + // CHECK-NEXT: acc.loop combined(parallel) { |
| 614 | + // CHECK: acc.yield |
| 615 | + // CHECK-NEXT: } loc |
| 616 | + // CHECK-NEXT: acc.yield |
| 617 | + // CHECK-NEXT: } loc |
| 618 | + |
| 619 | +#pragma acc serial loop wait(1) device_type(nvidia) wait(1) |
| 620 | + for(unsigned I = 0; I < N; ++I); |
| 621 | + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i |
| 622 | + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 |
| 623 | + // CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i |
| 624 | + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL2]] : !s32i to si32 |
| 625 | + // CHECK-NEXT: acc.serial combined(loop) wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : si32} [#acc.device_type<nvidia>]) { |
| 626 | + // CHECK-NEXT: acc.loop combined(serial) { |
| 627 | + // CHECK: acc.yield |
| 628 | + // CHECK-NEXT: } loc |
| 629 | + // CHECK-NEXT: acc.yield |
| 630 | + // CHECK-NEXT: } loc |
| 631 | + |
| 632 | +#pragma acc kernels loop wait(devnum: cond : 1) |
| 633 | + for(unsigned I = 0; I < N; ++I); |
| 634 | + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i |
| 635 | + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 |
| 636 | + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i |
| 637 | + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 |
| 638 | + // CHECK-NEXT: acc.kernels combined(loop) wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { |
| 639 | + // CHECK-NEXT: acc.loop combined(kernels) { |
| 640 | + // CHECK: acc.yield |
| 641 | + // CHECK-NEXT: } loc |
| 642 | + // CHECK-NEXT: acc.terminator |
| 643 | + // CHECK-NEXT: } loc |
| 644 | + |
| 645 | +#pragma acc parallel loop wait(devnum: cond : 1) device_type(nvidia) wait(devnum: cond : 1) |
| 646 | + for(unsigned I = 0; I < N; ++I); |
| 647 | + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i |
| 648 | + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 |
| 649 | + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i |
| 650 | + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 |
| 651 | + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i |
| 652 | + // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 |
| 653 | + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i |
| 654 | + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 |
| 655 | + // CHECK-NEXT: acc.parallel combined(loop) wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} [#acc.device_type<nvidia>]) { |
| 656 | + // CHECK-NEXT: acc.loop combined(parallel) { |
| 657 | + // CHECK: acc.yield |
| 658 | + // CHECK-NEXT: } loc |
| 659 | + // CHECK-NEXT: acc.yield |
| 660 | + // CHECK-NEXT: } loc |
| 661 | + |
| 662 | +#pragma acc serial loop wait(devnum: cond : 1, 2) |
| 663 | + for(unsigned I = 0; I < N; ++I); |
| 664 | + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i |
| 665 | + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 |
| 666 | + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i |
| 667 | + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 |
| 668 | + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i |
| 669 | + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 |
| 670 | + // CHECK-NEXT: acc.serial combined(loop) wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}) { |
| 671 | + // CHECK-NEXT: acc.loop combined(serial) { |
| 672 | + // CHECK: acc.yield |
| 673 | + // CHECK-NEXT: } loc |
| 674 | + // CHECK-NEXT: acc.yield |
| 675 | + // CHECK-NEXT: } loc |
| 676 | + |
| 677 | +#pragma acc kernels loop wait(devnum: cond : 1, 2) device_type(nvidia, radeon) wait(devnum: cond : 1, 2) |
| 678 | + for(unsigned I = 0; I < N; ++I); |
| 679 | + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i |
| 680 | + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 |
| 681 | + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i |
| 682 | + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 |
| 683 | + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i |
| 684 | + // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 |
| 685 | + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i |
| 686 | + // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 |
| 687 | + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i |
| 688 | + // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 |
| 689 | + // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i |
| 690 | + // CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 |
| 691 | + // CHECK-NEXT: acc.kernels combined(loop) wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<nvidia>], {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type<radeon>]) { |
| 692 | + // CHECK-NEXT: acc.loop combined(kernels) { |
| 693 | + // CHECK: acc.yield |
| 694 | + // CHECK-NEXT: } loc |
| 695 | + // CHECK-NEXT: acc.terminator |
| 696 | + // CHECK-NEXT: } loc |
| 697 | + |
| 698 | +#pragma acc parallel loop wait(cond, 1) |
| 699 | + for(unsigned I = 0; I < N; ++I); |
| 700 | + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i |
| 701 | + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 |
| 702 | + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i |
| 703 | + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 |
| 704 | + // CHECK-NEXT: acc.parallel combined(loop) wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { |
| 705 | + // CHECK-NEXT: acc.loop combined(parallel) { |
| 706 | + // CHECK: acc.yield |
| 707 | + // CHECK-NEXT: } loc |
| 708 | + // CHECK-NEXT: acc.yield |
| 709 | + // CHECK-NEXT: } loc |
| 710 | + |
| 711 | +#pragma acc serial loop wait(queues: cond, 1) device_type(radeon) |
| 712 | + for(unsigned I = 0; I < N; ++I); |
| 713 | + // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i |
| 714 | + // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 |
| 715 | + // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i |
| 716 | + // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 |
| 717 | + // CHECK-NEXT: acc.serial combined(loop) wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { |
| 718 | + // CHECK-NEXT: acc.loop combined(serial) { |
| 719 | + // CHECK: acc.yield |
| 720 | + // CHECK-NEXT: } loc |
| 721 | + // CHECK-NEXT: acc.yield |
| 722 | + // CHECK-NEXT: } loc |
| 723 | + |
576 | 724 | }
|
0 commit comments