@@ -2743,16 +2743,48 @@ func (o options) matchLen(name string, a, b, len reg.GPVirtual, end LabelRef) re
2743
2743
}
2744
2744
Label ("avx2_continue_" + name )
2745
2745
2746
+ Label ("matchlen_loopback_16_" + name )
2747
+ tmp2 := GP64 ()
2748
+ CMPL (len .As32 (), U8 (16 ))
2749
+ JB (LabelRef ("matchlen_match8_" + name ))
2750
+ MOVQ (Mem {Base : a , Index : matched , Scale : 1 }, tmp )
2751
+ MOVQ (Mem {Base : a , Index : matched , Scale : 1 , Disp : 8 }, tmp2 )
2752
+ XORQ (Mem {Base : b , Index : matched , Scale : 1 }, tmp )
2753
+ JNZ (LabelRef ("matchlen_bsf_8_" + name ))
2754
+ XORQ (Mem {Base : b , Index : matched , Scale : 1 , Disp : 8 }, tmp2 )
2755
+ JNZ (LabelRef ("matchlen_bsf_16" + name ))
2756
+ // All 8 byte matched, update and loop.
2757
+ LEAL (Mem {Base : len , Disp : - 16 }, len .As32 ())
2758
+ LEAL (Mem {Base : matched , Disp : 16 }, matched )
2759
+ JMP (LabelRef ("matchlen_loopback_16_" + name ))
2760
+
2761
+ Label ("matchlen_bsf_16" + name )
2762
+ // Not all match.
2763
+ Comment ("#ifdef GOAMD64_v3" )
2764
+ // 2016 BMI :TZCNT r64, r64 L: 0.57ns= 2.0c T: 0.29ns= 1.00c
2765
+ // 315 AMD64 :BSF r64, r64 L: 0.88ns= 3.1c T: 0.86ns= 3.00c
2766
+ TZCNTQ (tmp2 , tmp2 )
2767
+ Comment ("#else" )
2768
+ BSFQ (tmp2 , tmp2 )
2769
+ Comment ("#endif" )
2770
+
2771
+ SARQ (U8 (3 ), tmp2 )
2772
+ LEAL (Mem {Base : matched , Index : tmp2 , Scale : 1 , Disp : 8 }, matched )
2773
+ JMP (end )
2774
+
2775
+ Label ("matchlen_match8_" + name )
2746
2776
CMPL (len .As32 (), U8 (8 ))
2747
2777
JB (LabelRef ("matchlen_match4_" + name ))
2748
-
2749
- Label ("matchlen_loopback_" + name )
2750
2778
MOVQ (Mem {Base : a , Index : matched , Scale : 1 }, tmp )
2751
2779
XORQ (Mem {Base : b , Index : matched , Scale : 1 }, tmp )
2752
- TESTQ (tmp , tmp )
2753
- JZ (LabelRef ("matchlen_loop_" + name ))
2754
- // Not all match.
2780
+ JNZ (LabelRef ("matchlen_bsf_8_" + name ))
2781
+ // All 8 byte matched, update and loop.
2782
+ LEAL (Mem {Base : len , Disp : - 8 }, len .As32 ())
2783
+ LEAL (Mem {Base : matched , Disp : 8 }, matched )
2784
+ JMP (LabelRef ("matchlen_match4_" + name ))
2785
+ Label ("matchlen_bsf_8_" + name )
2755
2786
2787
+ // Not all match.
2756
2788
Comment ("#ifdef GOAMD64_v3" )
2757
2789
// 2016 BMI :TZCNT r64, r64 L: 0.57ns= 2.0c T: 0.29ns= 1.00c
2758
2790
// 315 AMD64 :BSF r64, r64 L: 0.88ns= 3.1c T: 0.86ns= 3.00c
@@ -2765,13 +2797,6 @@ func (o options) matchLen(name string, a, b, len reg.GPVirtual, end LabelRef) re
2765
2797
LEAL (Mem {Base : matched , Index : tmp , Scale : 1 }, matched )
2766
2798
JMP (end )
2767
2799
2768
- // All 8 byte matched, update and loop.
2769
- Label ("matchlen_loop_" + name )
2770
- LEAL (Mem {Base : len , Disp : - 8 }, len .As32 ())
2771
- LEAL (Mem {Base : matched , Disp : 8 }, matched )
2772
- CMPL (len .As32 (), U8 (8 ))
2773
- JAE (LabelRef ("matchlen_loopback_" + name ))
2774
-
2775
2800
// Less than 8 bytes left.
2776
2801
// Test 4 bytes...
2777
2802
Label ("matchlen_match4_" + name )
0 commit comments