@@ -3873,38 +3873,48 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
38733873 setOriginForNaryOp (I);
38743874 }
38753875
3876- // / Handle Arm NEON vector store intrinsics (vst{2,3,4} and vst1x_{2,3,4}).
3876+ // / Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
3877+ // / and vst{2,3,4}lane).
38773878 // /
38783879 // / Arm NEON vector store intrinsics have the output address (pointer) as the
3879- // / last argument, with the initial arguments being the inputs. They return
3880- // / void.
3880+ // / last argument, with the initial arguments being the inputs (and lane
3881+ // / number for vst{2,3,4}lane). They return void.
38813882 // /
38823883 // / - st4 interleaves the output e.g., st4 (inA, inB, inC, inD, outP) writes
38833884 // / abcdabcdabcdabcd... into *outP
38843885 // / - st1_x4 is non-interleaved e.g., st1_x4 (inA, inB, inC, inD, outP)
38853886 // / writes aaaa...bbbb...cccc...dddd... into *outP
3887+ // / - st4lane has arguments of (inA, inB, inC, inD, lane, outP)
38863888 // / These instructions can all be instrumented with essentially the same
38873889 // / MSan logic, simply by applying the corresponding intrinsic to the shadow.
3888- void handleNEONVectorStoreIntrinsic (IntrinsicInst &I) {
3890+ void handleNEONVectorStoreIntrinsic (IntrinsicInst &I, bool useLane ) {
38893891 IRBuilder<> IRB (&I);
38903892
38913893 // Don't use getNumOperands() because it includes the callee
38923894 int numArgOperands = I.arg_size ();
3893- assert (numArgOperands >= 1 );
38943895
3895- // The last arg operand is the output
3896+ // The last arg operand is the output (pointer)
3897+ assert (numArgOperands >= 1 );
38963898 Value *Addr = I.getArgOperand (numArgOperands - 1 );
38973899 assert (Addr->getType ()->isPointerTy ());
3900+ unsigned int skipTrailingOperands = 1 ;
38983901
38993902 if (ClCheckAccessAddress)
39003903 insertShadowCheck (Addr, &I);
39013904
3902- SmallVector<Value *, 8 > Shadows;
3903- // Every arg operand, other than the last one, is an input vector
3904- for (int i = 0 ; i < numArgOperands - 1 ; i++) {
3905+ // Second-last operand is the lane number (for vst{2,3,4}lane)
3906+ if (useLane) {
3907+ skipTrailingOperands ++;
3908+ assert (numArgOperands >= (int )skipTrailingOperands);
3909+ assert (isa<IntegerType>(I.getArgOperand (numArgOperands - skipTrailingOperands)->getType ()));
3910+ }
3911+
3912+ SmallVector<Value *, 8 > ShadowArgs;
3913+ // All the initial operands are the inputs
3914+ for (unsigned int i = 0 ; i < numArgOperands - skipTrailingOperands; i++) {
39053915 assert (isa<FixedVectorType>(I.getArgOperand (i)->getType ()));
39063916 Value *Shadow = getShadow (&I, i);
3907- Shadows .append (1 , Shadow);
3917+ ShadowArgs .append (1 , Shadow);
39083918 }
39093919
39103920 // MSan's GetShadowTy assumes the LHS is the type we want the shadow for
@@ -3921,29 +3931,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
39213931 FixedVectorType *OutputVectorTy = FixedVectorType::get (
39223932 cast<FixedVectorType>(I.getArgOperand (0 )->getType ())->getElementType (),
39233933 cast<FixedVectorType>(I.getArgOperand (0 )->getType ())->getNumElements () *
3924- (numArgOperands - 1 ));
3934+ (numArgOperands - skipTrailingOperands ));
39253935 Type *OutputShadowTy = getShadowTy (OutputVectorTy);
39263936
3937+ if (useLane)
3938+ ShadowArgs.append (1 , I.getArgOperand (numArgOperands - skipTrailingOperands));
3939+
39273940 Value *OutputShadowPtr, *OutputOriginPtr;
39283941 // AArch64 NEON does not need alignment (unless OS requires it)
39293942 std::tie (OutputShadowPtr, OutputOriginPtr) = getShadowOriginPtr (
39303943 Addr, IRB, OutputShadowTy, Align (1 ), /* isStore*/ true );
3931- Shadows .append (1 , OutputShadowPtr);
3944+ ShadowArgs .append (1 , OutputShadowPtr);
39323945
3933- // CreateIntrinsic will select the correct (integer) type for the
3934- // intrinsic; the original instruction I may have either integer- or
3935- // float-type inputs.
39363946 CallInst *CI =
3937- IRB.CreateIntrinsic (IRB.getVoidTy (), I.getIntrinsicID (), Shadows );
3947+ IRB.CreateIntrinsic (IRB.getVoidTy (), I.getIntrinsicID (), ShadowArgs );
39383948 setShadow (&I, CI);
39393949
39403950 if (MS.TrackOrigins ) {
39413951 // TODO: if we modelled the vst* instruction more precisely, we could
39423952 // more accurately track the origins (e.g., if both inputs are
39433953 // uninitialized for vst2, we currently blame the second input, even
39443954 // though part of the output depends only on the first input).
3955+ //
3956+ // This is particularly imprecise for vst{2,3,4}lane, since only one
3957+ // lane of each input is actually copied to the output.
39453958 OriginCombiner OC (this , IRB);
3946- for (int i = 0 ; i < numArgOperands - 1 ; i++)
3959+ for (unsigned int i = 0 ; i < numArgOperands - skipTrailingOperands ; i++)
39473960 OC.Add (I.getArgOperand (i));
39483961
39493962 const DataLayout &DL = F.getDataLayout ();
@@ -4316,7 +4329,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
43164329 case Intrinsic::aarch64_neon_st2:
43174330 case Intrinsic::aarch64_neon_st3:
43184331 case Intrinsic::aarch64_neon_st4: {
4319- handleNEONVectorStoreIntrinsic (I);
4332+ handleNEONVectorStoreIntrinsic (I, false );
4333+ break ;
4334+ }
4335+
4336+ case Intrinsic::aarch64_neon_st2lane:
4337+ case Intrinsic::aarch64_neon_st3lane:
4338+ case Intrinsic::aarch64_neon_st4lane: {
4339+ handleNEONVectorStoreIntrinsic (I, true );
43204340 break ;
43214341 }
43224342
0 commit comments