Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Basic/BuiltinsNVPTX.td
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ let Attributes = [NoReturn] in {
}
let Attributes = [NoThrow] in {
def __nvvm_nanosleep : NVPTXBuiltinSMAndPTX<"void(unsigned int)", SM_70, PTX63>;
def __nvvm_pm_event_mask : NVPTXBuiltin<"void(_Constant unsigned short)">;
}

// Min Max
Expand Down
7 changes: 7 additions & 0 deletions clang/test/CodeGen/builtins-nvptx.c
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,13 @@ __device__ void nvvm_vote(int pred) {
// CHECK: ret void
}

// CHECK-LABEL: nvvm_pm_event_mask
__device__ void nvvm_pm_event_mask() {
// CHECK: call void @llvm.nvvm.pm.event.mask(i16 255)
__nvvm_pm_event_mask(255);
// CHECK: ret void
}

// CHECK-LABEL: nvvm_nanosleep
__device__ void nvvm_nanosleep(int d) {
#if __CUDA_ARCH__ >= 700
Expand Down
23 changes: 23 additions & 0 deletions llvm/docs/NVPTXUsage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1868,6 +1868,29 @@ If the request failed, the behavior of these intrinsics is undefined.

For more information, refer `PTX ISA <https://docs.nvidia.com/cuda/parallel-thread-execution/?a#parallel-synchronization-and-communication-instructions-clusterlaunchcontrol-query-cancel>`__.

Perf Monitor Event Intrinsics
-----------------------------

'``llvm.nvvm.pm.event.mask``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Syntax:
"""""""

.. code-block:: llvm

declare void @llvm.nvvm.pm.event.mask(i16 immarg %mask_val)

Overview:
"""""""""

The '``llvm.nvvm.pm.event.mask``' intrinsic triggers one or more
performance monitor events. Each bit in the 16-bit immediate operand
``%mask_val`` controls an event.

For more information on the pmevent instructions, refer to the PTX ISA
`<https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#miscellaneous-instructions-pmevent>`_.

Other Intrinsics
----------------

Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsNVVM.td
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,11 @@ let TargetPrefix = "nvvm" in {
DefaultAttrsIntrinsic<[], [llvm_i32_ty],
[IntrConvergent, IntrNoMem, IntrHasSideEffects]>;

// Performance Monitor Events (pm events) intrinsics
def int_nvvm_pm_event_mask : NVVMBuiltin,
DefaultAttrsIntrinsic<[], [llvm_i16_ty],
[IntrConvergent, IntrNoMem, IntrHasSideEffects,
ImmArg<ArgIndex<0>>]>;
//
// Min Max
//
Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -1052,6 +1052,16 @@ def INT_NVVM_NANOSLEEP_I : NVPTXInst<(outs), (ins i32imm:$i), "nanosleep.u32 \t$
def INT_NVVM_NANOSLEEP_R : NVPTXInst<(outs), (ins Int32Regs:$i), "nanosleep.u32 \t$i;",
[(int_nvvm_nanosleep i32:$i)]>,
Requires<[hasPTX<63>, hasSM<70>]>;

let hasSideEffects = 1 in {
// Performance Monitor events
def INT_PM_EVENT_MASK : BasicNVPTXInst<(outs),
(ins i16imm:$mask),
"pmevent.mask",
[(int_nvvm_pm_event_mask timm:$mask)]>,
Requires<[hasSM<20>, hasPTX<30>]>;
} // hasSideEffects

//
// Min Max
//
Expand Down
15 changes: 15 additions & 0 deletions llvm/test/CodeGen/NVPTX/pm-event.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}

declare void @llvm.nvvm.pm.event.mask(i16 %mask)

; CHECK-LABEL: test_pm_event
define void @test_pm_event() {
; CHECK: pmevent.mask 255;
call void @llvm.nvvm.pm.event.mask(i16 u0xff)

; CHECK: pmevent.mask 4096;
call void @llvm.nvvm.pm.event.mask(i16 u0x1000)

ret void
}