llvm · bogner · Feb 26, 2025 · Feb 22, 2025 · Feb 25, 2025 · Feb 26, 2025
diff --git a/llvm/docs/DirectX/DXILResources.rst b/llvm/docs/DirectX/DXILResources.rst
@@ -277,7 +277,7 @@ Examples:
 Accessing Resources as Memory
 -----------------------------
 
-*relevant types: Buffers, CBuffer, and Textures*
+*relevant types: Buffers and Textures*
 
 Loading and storing from resources is generally represented in LLVM using
 operations on memory that is only accessible via a handle object. Given a
@@ -321,12 +321,11 @@ Examples:
 Loads, Samples, and Gathers
 ---------------------------
 
-*relevant types: Buffers, CBuffers, and Textures*
+*relevant types: Buffers and Textures*
 
-All load, sample, and gather operations in DXIL return a `ResRet`_ type, and
-CBuffer loads return a similar `CBufRet`_ type. These types are structs
-containing 4 elements of some basic type, and in the case of `ResRet` a 5th
-element that is used by the `CheckAccessFullyMapped`_ operation. Some of these
+All load, sample, and gather operations in DXIL return a `ResRet`_ type. These
+types are structs containing 4 elements of some basic type, and a 5th element
+that is used by the `CheckAccessFullyMapped`_ operation. Some of these
 operations, like `RawBufferLoad`_ include a mask and/or alignment that tell us
 some information about how to interpret those four values.
 
@@ -632,3 +631,118 @@ Examples:
        target("dx.RawBuffer", i8, 1, 0, 0) %buffer,
        i32 %index, i32 0, <4 x double> %data)
 
+Constant Buffer Loads
+---------------------
+
+*relevant types: CBuffers*
+
+The `CBufferLoadLegacy`_ operation, which despite the name is the only
+supported way to load from a cbuffer in any DXIL version, loads a single "row"
+of a cbuffer, which is exactly 16 bytes. The return value of the operation is
+represented by a `CBufRet`_ type, which has variants for 2 64-bit values, 4
+32-bit values, and 8 16-bit values.
+
+We represent these in LLVM IR with 3 separate operations, which return a
+2-element, 4-element, or 8-element struct respectively.
+
+.. _CBufferLoadLegacy: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#cbufferLoadLegacy
+.. _CBufRet: https://github.com/microsoft/DirectXShaderCompiler/blob/main/docs/DXIL.rst#cbufferloadlegacy
+
+.. list-table:: ``@llvm.dx.resource.load.cbufferrow.4``
+   :header-rows: 1
+
+   * - Argument
+     -
+     - Type
+     - Description
+   * - Return value
+     -
+     - A struct of 4 32-bit values
+     - A single row of a cbuffer, interpreted as 4 32-bit values
+   * - ``%buffer``
+     - 0
+     - ``target(dx.CBuffer, ...)``
+     - The buffer to load from
+   * - ``%index``
+     - 1
+     - ``i32``
+     - Index into the buffer
+
+Examples:
+
+.. code-block:: llvm
+
+   %ret = call {float, float, float, float}
+       @llvm.dx.resource.load.cbufferrow.4(
+           target("dx.CBuffer", target("dx.Layout", {float}, 4, 0)) %buffer,
+           i32 %index)
+   %ret = call {i32, i32, i32, i32}
+       @llvm.dx.resource.load.cbufferrow.4(
+           target("dx.CBuffer", target("dx.Layout", {i32}, 4, 0)) %buffer,
+           i32 %index)
+
+.. list-table:: ``@llvm.dx.resource.load.cbufferrow.2``
+   :header-rows: 1
+
+   * - Argument
+     -
+     - Type
+     - Description
+   * - Return value
+     -
+     - A struct of 2 64-bit values
+     - A single row of a cbuffer, interpreted as 2 64-bit values
+   * - ``%buffer``
+     - 0
+     - ``target(dx.CBuffer, ...)``
+     - The buffer to load from
+   * - ``%index``
+     - 1
+     - ``i32``
+     - Index into the buffer
+
+Examples:
+
+.. code-block:: llvm
+
+   %ret = call {double, double}
+       @llvm.dx.resource.load.cbufferrow.2(
+           target("dx.CBuffer", target("dx.Layout", {double}, 8, 0)) %buffer,
+           i32 %index)
+   %ret = call {i64, i64}
+       @llvm.dx.resource.load.cbufferrow.2(
+           target("dx.CBuffer", target("dx.Layout", {i64}, 4, 0)) %buffer,
+           i32 %index)
+
+.. list-table:: ``@llvm.dx.resource.load.cbufferrow.8``
+   :header-rows: 1
+
+   * - Argument
+     -
+     - Type
+     - Description
+   * - Return value
+     -
+     - A struct of 8 16-bit values
+     - A single row of a cbuffer, interpreted as 8 16-bit values
+   * - ``%buffer``
+     - 0
+     - ``target(dx.CBuffer, ...)``
+     - The buffer to load from
+   * - ``%index``
+     - 1
+     - ``i32``
+     - Index into the buffer
+
+Examples:
+
+.. code-block:: llvm
+
+   %ret = call {half, half, half, half, half, half, half, half}
+       @llvm.dx.resource.load.cbufferrow.8(
+           target("dx.CBuffer", target("dx.Layout", {half}, 2, 0)) %buffer,
+           i32 %index)
+   %ret = call {i16, i16, i16, i16, i16, i16, i16, i16}
+       @llvm.dx.resource.load.cbufferrow.8(
+           target("dx.CBuffer", target("dx.Layout", {i16}, 2, 0)) %buffer,
+           i32 %index)
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -45,6 +45,21 @@ def int_dx_resource_store_rawbuffer
           [], [llvm_any_ty, llvm_i32_ty, llvm_i32_ty, llvm_any_ty],
           [IntrWriteMem]>;
 
+// dx.resource.load.cbufferrow encodes the number of elements returned in the
+// function name. The total size of the return should always be 128 bits.
+def int_dx_resource_load_cbufferrow_8
+    : DefaultAttrsIntrinsic<
+          [llvm_any_ty, llvm_any_ty, llvm_any_ty, llvm_any_ty,
+           llvm_any_ty, llvm_any_ty, llvm_any_ty, llvm_any_ty],
+          [llvm_any_ty, llvm_i32_ty], [IntrReadMem]>;
+def int_dx_resource_load_cbufferrow_4
+    : DefaultAttrsIntrinsic<
+          [llvm_any_ty, llvm_any_ty, llvm_any_ty, llvm_any_ty],
+          [llvm_any_ty, llvm_i32_ty], [IntrReadMem]>;
+def int_dx_resource_load_cbufferrow_2
+    : DefaultAttrsIntrinsic<[llvm_any_ty, llvm_any_ty],
+                            [llvm_any_ty, llvm_i32_ty], [IntrReadMem]>;
+
 def int_dx_resource_updatecounter
     : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_i8_ty],
                             [IntrInaccessibleMemOrArgMemOnly]>;

diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
@@ -46,6 +46,12 @@ def ResRetDoubleTy : DXILOpParamType;
 def ResRetInt16Ty : DXILOpParamType;
 def ResRetInt32Ty : DXILOpParamType;
 def ResRetInt64Ty : DXILOpParamType;
+def CBufRetHalfTy : DXILOpParamType;
+def CBufRetFloatTy : DXILOpParamType;
+def CBufRetDoubleTy : DXILOpParamType;
+def CBufRetInt16Ty : DXILOpParamType;
+def CBufRetInt32Ty : DXILOpParamType;
+def CBufRetInt64Ty : DXILOpParamType;
 def HandleTy : DXILOpParamType;
 def ResBindTy : DXILOpParamType;
 def ResPropsTy : DXILOpParamType;
@@ -816,6 +822,19 @@ def CreateHandle : DXILOp<57, createHandle> {
   let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
 }
 
+def CBufferLoadLegacy : DXILOp<59, cbufferLoadLegacy> {
+  let Doc = "loads a value from a constant buffer resource";
+  // Handle, Index
+  let arguments = [HandleTy, Int32Ty];
+  let result = OverloadTy;
+  let overloads = [Overloads<DXIL1_0, [
+    CBufRetHalfTy, CBufRetFloatTy, CBufRetDoubleTy, CBufRetInt16Ty,
+    CBufRetInt32Ty, CBufRetInt64Ty
+  ]>];
+  let stages = [Stages<DXIL1_0, [all_stages]>];
+  let attributes = [Attributes<DXIL1_0, [ReadOnly]>];
+}
+
 def BufferLoad : DXILOp<68, bufferLoad> {
   let Doc = "reads from a TypedBuffer";
   // Handle, Coord0, Coord1

diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.cpp b/llvm/lib/Target/DirectX/DXILOpBuilder.cpp
@@ -201,6 +201,29 @@ static StructType *getResRetType(Type *ElementTy) {
   return getOrCreateStructType(TypeName, FieldTypes, Ctx);
 }
 
+static StructType *getCBufRetType(Type *ElementTy) {
+  LLVMContext &Ctx = ElementTy->getContext();
+  OverloadKind Kind = getOverloadKind(ElementTy);
+  std::string TypeName = constructOverloadTypeName(Kind, "dx.types.CBufRet.");
+
+  // 64-bit types only have two elements
+  if (ElementTy->isDoubleTy() || ElementTy->isIntegerTy(64))
+    return getOrCreateStructType(TypeName, {ElementTy, ElementTy}, Ctx);
+
+  // 16-bit types pack 8 elements and have .8 in their name to differentiate
+  // from min-precision types.
+  if (ElementTy->isHalfTy() || ElementTy->isIntegerTy(16)) {
+    TypeName += ".8";
+    return getOrCreateStructType(TypeName,
+                                 {ElementTy, ElementTy, ElementTy, ElementTy,
+                                  ElementTy, ElementTy, ElementTy, ElementTy},
+                                 Ctx);
+  }
+
+  return getOrCreateStructType(
+      TypeName, {ElementTy, ElementTy, ElementTy, ElementTy}, Ctx);
+}
+
 static StructType *getHandleType(LLVMContext &Ctx) {
   return getOrCreateStructType("dx.types.Handle", PointerType::getUnqual(Ctx),
                                Ctx);
@@ -265,6 +288,18 @@ static Type *getTypeFromOpParamType(OpParamType Kind, LLVMContext &Ctx,
     return getResRetType(Type::getInt32Ty(Ctx));
   case OpParamType::ResRetInt64Ty:
     return getResRetType(Type::getInt64Ty(Ctx));
+  case OpParamType::CBufRetHalfTy:
+    return getCBufRetType(Type::getHalfTy(Ctx));
+  case OpParamType::CBufRetFloatTy:
+    return getCBufRetType(Type::getFloatTy(Ctx));
+  case OpParamType::CBufRetDoubleTy:
+    return getCBufRetType(Type::getDoubleTy(Ctx));
+  case OpParamType::CBufRetInt16Ty:
+    return getCBufRetType(Type::getInt16Ty(Ctx));
+  case OpParamType::CBufRetInt32Ty:
+    return getCBufRetType(Type::getInt32Ty(Ctx));
+  case OpParamType::CBufRetInt64Ty:
+    return getCBufRetType(Type::getInt64Ty(Ctx));
   case OpParamType::HandleTy:
     return getHandleType(Ctx);
   case OpParamType::ResBindTy:
@@ -535,6 +570,10 @@ StructType *DXILOpBuilder::getResRetType(Type *ElementTy) {
   return ::getResRetType(ElementTy);
 }
 
+StructType *DXILOpBuilder::getCBufRetType(Type *ElementTy) {
+  return ::getCBufRetType(ElementTy);
+}
+
 StructType *DXILOpBuilder::getHandleType() {
   return ::getHandleType(IRB.getContext());
 }

diff --git a/llvm/lib/Target/DirectX/DXILOpBuilder.h b/llvm/lib/Target/DirectX/DXILOpBuilder.h
@@ -50,6 +50,9 @@ class DXILOpBuilder {
   /// Get a `%dx.types.ResRet` type with the given element type.
   StructType *getResRetType(Type *ElementTy);
 
+  /// Get a `%dx.types.CBufRet` type with the given element type.
+  StructType *getCBufRetType(Type *ElementTy);
+
   /// Get the `%dx.types.Handle` type.
   StructType *getHandleType();
 

diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -569,6 +569,32 @@ class OpLowerer {
     });
   }
 
+  [[nodiscard]] bool lowerCBufferLoad(Function &F) {
+    IRBuilder<> &IRB = OpBuilder.getIRB();
+
+    return replaceFunction(F, [&](CallInst *CI) -> Error {
+      IRB.SetInsertPoint(CI);
+
+      Type *OldTy = cast<StructType>(CI->getType())->getElementType(0);
+      Type *ScalarTy = OldTy->getScalarType();
+      Type *NewRetTy = OpBuilder.getCBufRetType(ScalarTy);
+
+      Value *Handle =
+          createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
+      Value *Index = CI->getArgOperand(1);
+
+      Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
+          OpCode::CBufferLoadLegacy, {Handle, Index}, CI->getName(), NewRetTy);
+      if (Error E = OpCall.takeError())
+        return E;
+      if (Error E = replaceNamedStructUses(CI, *OpCall))
+        return E;
+
+      CI->eraseFromParent();
+      return Error::success();
+    });
+  }
+
   [[nodiscard]] bool lowerUpdateCounter(Function &F) {
     IRBuilder<> &IRB = OpBuilder.getIRB();
     Type *Int32Ty = IRB.getInt32Ty();
@@ -808,6 +834,11 @@ class OpLowerer {
       case Intrinsic::dx_resource_store_rawbuffer:
         HasErrors |= lowerBufferStore(F, /*IsRaw=*/true);
         break;
+      case Intrinsic::dx_resource_load_cbufferrow_2:
+      case Intrinsic::dx_resource_load_cbufferrow_4:
+      case Intrinsic::dx_resource_load_cbufferrow_8:
+        HasErrors |= lowerCBufferLoad(F);
+        break;
       case Intrinsic::dx_resource_updatecounter:
         HasErrors |= lowerUpdateCounter(F);
         break;

diff --git a/llvm/test/CodeGen/DirectX/CBufferLoadLegacy-errors.ll b/llvm/test/CodeGen/DirectX/CBufferLoadLegacy-errors.ll
@@ -0,0 +1,45 @@
+; We use llc for this test so that we don't abort after the first error.
+; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s
+
+target triple = "dxil-pc-shadermodel6.6-compute"
+
+declare void @f32_user(float)
+declare void @f64_user(double)
+declare void @f16_user(half)
+
+; CHECK: error:
+; CHECK-SAME: in function four64
+; CHECK-SAME: Type mismatch between intrinsic and DXIL op
+define void @four64() "hlsl.export" {
+  %buffer = call target("dx.CBuffer", target("dx.Layout", {double}, 8, 0))
+      @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false)
+
+  %load = call {double, double, double, double} @llvm.dx.resource.load.cbufferrow.4(
+      target("dx.CBuffer", target("dx.Layout", {double}, 8, 0)) %buffer,
+      i32 0)
+  %data = extractvalue {double, double, double, double} %load, 0
+
+  call void @f64_user(double %data)
+
+  ret void
+}
+
+; CHECK: error:
+; CHECK-SAME: in function two32
+; CHECK-SAME: Type mismatch between intrinsic and DXIL op
+define void @two32() "hlsl.export" {
+  %buffer = call target("dx.CBuffer", target("dx.Layout", {float}, 4, 0))
+      @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false)
+
+  %load = call {float, float} @llvm.dx.resource.load.cbufferrow.2(
+      target("dx.CBuffer", target("dx.Layout", {float}, 4, 0)) %buffer,
+      i32 0)
+  %data = extractvalue {float, float} %load, 0
+
+  call void @f32_user(float %data)
+
+  ret void
+}
+
+declare { double, double, double, double } @llvm.dx.resource.load.cbufferrow.4.f64.f64.f64.f64.tdx.CBuffer_tdx.Layout_sl_f64s_8_0tt(target("dx.CBuffer", target("dx.Layout", { double }, 8, 0)), i32)
+declare { float, float } @llvm.dx.resource.load.cbufferrow.2.f32.f32.tdx.CBuffer_tdx.Layout_sl_f32s_4_0tt(target("dx.CBuffer", target("dx.Layout", { float }, 4, 0)), i32)