Skip to content

Commit 1c8915a

Browse files
committed
[msan] Approximately handle AVX Galois Field Affine Transformation
e.g., <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8) <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8) <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8>, <64 x i8>, i8) Out A x b where Out = A * x + b in GF(2) (but A and x are packed) Multiplication in GF(2) is equivalent to bitwise AND. However, the matrix computation also includes a parity calculation. For the bitwise AND of bits V1 and V2, the exact shadow is: Out_Shadow = (V1_Shadow & V2_Shadow) | (V1 & V2_Shadow) | (V1_Shadow & V2_Shadow) We approximate the shadow of gf2p8affine using: Out_Shadow = _mm512_gf2p8affine_epi64_epi8(x_Shadow, A_shadow, 0) | _mm512_gf2p8affine_epi64_epi8(x, A_shadow, 0) | _mm512_gf2p8affine_epi64_epi8(x_Shadow, A, 0) | _mm512_set1_epi8(b_Shadow) This approximation has false negatives: if an intermediate dot-product contains an even number of 1's, the parity is 0. It has no false positives. Updates the test from llvm#149258
1 parent b5c7482 commit 1c8915a

File tree

2 files changed

+155
-111
lines changed

2 files changed

+155
-111
lines changed

llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4769,6 +4769,78 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
47694769
setOriginForNaryOp(I);
47704770
}
47714771

4772+
// Approximately handle AVX Galois Field Affine Transformation
4773+
//
4774+
// e.g.,
4775+
// <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8)
4776+
// <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8)
4777+
// <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8>, <64 x i8>, i8)
4778+
// Out A x b
4779+
// where Out = A * x + b in GF(2) (but A and x are packed)
4780+
//
4781+
// Multiplication in GF(2) is equivalent to bitwise AND. However, the matrix
4782+
// computation also includes a parity calculation.
4783+
//
4784+
// For the bitwise AND of bits V1 and V2, the exact shadow is:
4785+
// Out_Shadow = (V1_Shadow & V2_Shadow)
4786+
// | (V1 & V2_Shadow)
4787+
// | (V1_Shadow & V2_Shadow)
4788+
//
4789+
// We approximate the shadow of gf2p8affine using:
4790+
// Out_Shadow = _mm512_gf2p8affine_epi64_epi8(x_Shadow, A_shadow, 0)
4791+
// | _mm512_gf2p8affine_epi64_epi8(x, A_shadow, 0)
4792+
// | _mm512_gf2p8affine_epi64_epi8(x_Shadow, A, 0)
4793+
// | _mm512_set1_epi8(b_Shadow)
4794+
//
4795+
// This approximation has false negatives: if an intermediate dot-product
4796+
// contains an even number of 1's, the parity is 0.
4797+
// It has no false positives.
4798+
void handleAVXGF2P8Affine(IntrinsicInst &I) {
4799+
IRBuilder<> IRB(&I);
4800+
4801+
assert(I.arg_size() == 3);
4802+
Value *A = I.getOperand(0);
4803+
Value *x = I.getOperand(1);
4804+
Value *b = I.getOperand(2);
4805+
4806+
assert(isFixedIntVector(A));
4807+
assert(cast<VectorType>(A->getType())
4808+
->getElementType()
4809+
->getScalarSizeInBits() == 8);
4810+
4811+
assert(A->getType() == x->getType());
4812+
4813+
assert(b->getType()->isIntegerTy());
4814+
assert(b->getType()->getScalarSizeInBits() == 8);
4815+
4816+
assert(I.getType() == A->getType());
4817+
4818+
Value *AShadow = getShadow(A);
4819+
Value *xShadow = getShadow(x);
4820+
Value *bZeroShadow = getCleanShadow(b);
4821+
4822+
CallInst *xShadowAShadow = IRB.CreateIntrinsic(
4823+
I.getType(), I.getIntrinsicID(), {xShadow, AShadow, bZeroShadow});
4824+
CallInst *xAShadow = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(),
4825+
{x, AShadow, bZeroShadow});
4826+
CallInst *xShadowA = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(),
4827+
{xShadow, A, bZeroShadow});
4828+
4829+
unsigned NumElements = cast<FixedVectorType>(I.getType())->getNumElements();
4830+
Value *bShadow = getShadow(b);
4831+
Value *bBroadcastShadow = getCleanShadow(AShadow);
4832+
// There is no LLVM IR intrinsic for _mm512_set1_epi8.
4833+
// This loop generates a lot of LLVM IR, which we expect that CodeGen will
4834+
// lower appropriately (e.g., VPBROADCASTB).
4835+
// Besides, b is often a constant, in which case it is fully initialized.
4836+
for (unsigned i = 0; i < NumElements; i++)
4837+
bBroadcastShadow = IRB.CreateInsertElement(bBroadcastShadow, bShadow, i);
4838+
4839+
setShadow(&I, IRB.CreateOr(
4840+
{xShadowAShadow, xAShadow, xShadowA, bBroadcastShadow}));
4841+
setOriginForNaryOp(I);
4842+
}
4843+
47724844
// Handle Arm NEON vector load intrinsics (vld*).
47734845
//
47744846
// The WithLane instructions (ld[234]lane) are similar to:
@@ -5604,6 +5676,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
56045676
break;
56055677
}
56065678

5679+
// AVX Galois Field New Instructions
5680+
case Intrinsic::x86_vgf2p8affineqb_128:
5681+
case Intrinsic::x86_vgf2p8affineqb_256:
5682+
case Intrinsic::x86_vgf2p8affineqb_512: {
5683+
handleAVXGF2P8Affine(I);
5684+
break;
5685+
}
5686+
56075687
case Intrinsic::fshl:
56085688
case Intrinsic::fshr:
56095689
handleFunnelShift(I);

0 commit comments

Comments
 (0)