-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[msan] Approximately handle AVX Galois Field Affine Transformation #150794
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1c8915a
f6f7659
73dc317
3dd17c7
3884f9e
ee2bade
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4769,6 +4769,79 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { | |
| setOriginForNaryOp(I); | ||
| } | ||
|
|
||
| // Approximately handle AVX Galois Field Affine Transformation | ||
| // | ||
| // e.g., | ||
| // <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8) | ||
| // <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8) | ||
| // <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8>, <64 x i8>, i8) | ||
| // Out A x b | ||
| // where A and x are packed matrices, b is a vector, | ||
| // Out = A * x + b in GF(2) | ||
| // | ||
| // Multiplication in GF(2) is equivalent to bitwise AND. However, the matrix | ||
| // computation also includes a parity calculation. | ||
| // | ||
| // For the bitwise AND of bits V1 and V2, the exact shadow is: | ||
| // Out_Shadow = (V1_Shadow & V2_Shadow) | ||
| // | (V1 & V2_Shadow) | ||
| // | (V1_Shadow & V2 ) | ||
| // | ||
| // We approximate the shadow of gf2p8affineqb using: | ||
| // Out_Shadow = gf2p8affineqb(x_Shadow, A_shadow, 0) | ||
| // | gf2p8affineqb(x, A_shadow, 0) | ||
| // | gf2p8affineqb(x_Shadow, A, 0) | ||
| // | set1_epi8(b_Shadow) | ||
| // | ||
| // This approximation has false negatives: if an intermediate dot-product | ||
| // contains an even number of 1's, the parity is 0. | ||
| // It has no false positives. | ||
| void handleAVXGF2P8Affine(IntrinsicInst &I) { | ||
| IRBuilder<> IRB(&I); | ||
|
|
||
| assert(I.arg_size() == 3); | ||
| Value *A = I.getOperand(0); | ||
| Value *X = I.getOperand(1); | ||
| Value *B = I.getOperand(2); | ||
|
|
||
| assert(isFixedIntVector(A)); | ||
| assert(cast<VectorType>(A->getType()) | ||
| ->getElementType() | ||
| ->getScalarSizeInBits() == 8); | ||
|
|
||
| assert(A->getType() == X->getType()); | ||
|
|
||
| assert(B->getType()->isIntegerTy()); | ||
| assert(B->getType()->getScalarSizeInBits() == 8); | ||
|
|
||
| assert(I.getType() == A->getType()); | ||
|
|
||
| Value *AShadow = getShadow(A); | ||
| Value *XShadow = getShadow(X); | ||
| Value *BZeroShadow = getCleanShadow(B); | ||
|
|
||
| CallInst *AShadowXShadow = IRB.CreateIntrinsic( | ||
| I.getType(), I.getIntrinsicID(), {XShadow, AShadow, BZeroShadow}); | ||
| CallInst *AShadowX = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), | ||
| {X, AShadow, BZeroShadow}); | ||
| CallInst *XShadowA = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), | ||
| {XShadow, A, BZeroShadow}); | ||
|
|
||
| unsigned NumElements = cast<FixedVectorType>(I.getType())->getNumElements(); | ||
| Value *BShadow = getShadow(B); | ||
| Value *BBroadcastShadow = getCleanShadow(AShadow); | ||
| // There is no LLVM IR intrinsic for _mm512_set1_epi8. | ||
| // This loop generates a lot of LLVM IR, which we expect that CodeGen will | ||
| // lower appropriately (e.g., VPBROADCASTB). | ||
| // Besides, b is often a constant, in which case it is fully initialized. | ||
| for (unsigned i = 0; i < NumElements; i++) | ||
| BBroadcastShadow = IRB.CreateInsertElement(BBroadcastShadow, BShadow, i); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This just concats There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
shufflevector could be used instead. I chose insertelement because it is most likely to be the instruction sequence that the vectorizer will know how to convert into the AVX instruction, if not now then in the future. clang/test/CodeGen/X86/avx512f-builtins.c shows that: |
||
|
|
||
| setShadow(&I, IRB.CreateOr( | ||
| {AShadowXShadow, AShadowX, XShadowA, BBroadcastShadow})); | ||
| setOriginForNaryOp(I); | ||
| } | ||
|
|
||
| // Handle Arm NEON vector load intrinsics (vld*). | ||
| // | ||
| // The WithLane instructions (ld[234]lane) are similar to: | ||
|
|
@@ -5604,6 +5677,13 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { | |
| break; | ||
| } | ||
|
|
||
| // AVX Galois Field New Instructions | ||
fmayer marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| case Intrinsic::x86_vgf2p8affineqb_128: | ||
| case Intrinsic::x86_vgf2p8affineqb_256: | ||
| case Intrinsic::x86_vgf2p8affineqb_512: | ||
| handleAVXGF2P8Affine(I); | ||
| break; | ||
|
|
||
| case Intrinsic::fshl: | ||
| case Intrinsic::fshr: | ||
| handleFunnelShift(I); | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.