Skip to content

Commit c147026

Browse files
committed
slightly faster ARM kernel
1 parent 0862418 commit c147026

File tree

1 file changed

+7
-15
lines changed

1 file changed

+7
-15
lines changed

src/Base64ARM.cs

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -344,23 +344,15 @@ private static unsafe void Base64DecodeBlock(byte* outPtr, byte* srcPtr)
344344
// Load 4 vectors from src
345345
var (str0, str1, str2, str3) = AdvSimd.Arm64.Load4xVector128AndUnzip(srcPtr);
346346

347+
// Perform bitwise operations to simulate NEON intrinsics
348+
Vector128<byte> outvec0 = AdvSimd.ShiftLeftAndInsert(
349+
AdvSimd.ShiftRightLogical(str1, 4), str0, 2);
347350

351+
Vector128<byte> outvec1 = AdvSimd.ShiftLeftAndInsert(
352+
AdvSimd.ShiftRightLogical(str2, 2), str1, 4);
348353

349-
// Perform bitwise operations to simulate NEON intrinsics
350-
Vector128<byte> outvec0 = AdvSimd.Or(
351-
AdvSimd.ShiftLeftLogical(str0, 2),
352-
AdvSimd.ShiftRightLogical(str1, 4)
353-
);
354-
355-
Vector128<byte> outvec1 = AdvSimd.Or(
356-
AdvSimd.ShiftLeftLogical(str1, 4),
357-
AdvSimd.ShiftRightLogical(str2, 2)
358-
);
359-
360-
Vector128<byte> outvec2 = AdvSimd.Or(
361-
AdvSimd.ShiftLeftLogical(str2, 6),
362-
str3
363-
);
354+
Vector128<byte> outvec2 = AdvSimd.ShiftLeftAndInsert(
355+
str3, str2, 6);
364356

365357
// Store the result in outData
366358
AdvSimd.Arm64.StoreVectorAndZip(outPtr, (outvec0, outvec1, outvec2));

0 commit comments

Comments
 (0)