Skip to content

Commit c1dcc38

Browse files
authored
s2: Do 2 overlapping match checks (#839)
Mainly faster on long matches, but has little to no regression on short matches.
1 parent 895291c commit c1dcc38

File tree

2 files changed

+1230
-435
lines changed

2 files changed

+1230
-435
lines changed

s2/_generate/gen.go

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2743,16 +2743,48 @@ func (o options) matchLen(name string, a, b, len reg.GPVirtual, end LabelRef) re
27432743
}
27442744
Label("avx2_continue_" + name)
27452745

2746+
Label("matchlen_loopback_16_" + name)
2747+
tmp2 := GP64()
2748+
CMPL(len.As32(), U8(16))
2749+
JB(LabelRef("matchlen_match8_" + name))
2750+
MOVQ(Mem{Base: a, Index: matched, Scale: 1}, tmp)
2751+
MOVQ(Mem{Base: a, Index: matched, Scale: 1, Disp: 8}, tmp2)
2752+
XORQ(Mem{Base: b, Index: matched, Scale: 1}, tmp)
2753+
JNZ(LabelRef("matchlen_bsf_8_" + name))
2754+
XORQ(Mem{Base: b, Index: matched, Scale: 1, Disp: 8}, tmp2)
2755+
JNZ(LabelRef("matchlen_bsf_16" + name))
2756+
// All 8 byte matched, update and loop.
2757+
LEAL(Mem{Base: len, Disp: -16}, len.As32())
2758+
LEAL(Mem{Base: matched, Disp: 16}, matched)
2759+
JMP(LabelRef("matchlen_loopback_16_" + name))
2760+
2761+
Label("matchlen_bsf_16" + name)
2762+
// Not all match.
2763+
Comment("#ifdef GOAMD64_v3")
2764+
// 2016 BMI :TZCNT r64, r64 L: 0.57ns= 2.0c T: 0.29ns= 1.00c
2765+
// 315 AMD64 :BSF r64, r64 L: 0.88ns= 3.1c T: 0.86ns= 3.00c
2766+
TZCNTQ(tmp2, tmp2)
2767+
Comment("#else")
2768+
BSFQ(tmp2, tmp2)
2769+
Comment("#endif")
2770+
2771+
SARQ(U8(3), tmp2)
2772+
LEAL(Mem{Base: matched, Index: tmp2, Scale: 1, Disp: 8}, matched)
2773+
JMP(end)
2774+
2775+
Label("matchlen_match8_" + name)
27462776
CMPL(len.As32(), U8(8))
27472777
JB(LabelRef("matchlen_match4_" + name))
2748-
2749-
Label("matchlen_loopback_" + name)
27502778
MOVQ(Mem{Base: a, Index: matched, Scale: 1}, tmp)
27512779
XORQ(Mem{Base: b, Index: matched, Scale: 1}, tmp)
2752-
TESTQ(tmp, tmp)
2753-
JZ(LabelRef("matchlen_loop_" + name))
2754-
// Not all match.
2780+
JNZ(LabelRef("matchlen_bsf_8_" + name))
2781+
// All 8 byte matched, update and loop.
2782+
LEAL(Mem{Base: len, Disp: -8}, len.As32())
2783+
LEAL(Mem{Base: matched, Disp: 8}, matched)
2784+
JMP(LabelRef("matchlen_match4_" + name))
2785+
Label("matchlen_bsf_8_" + name)
27552786

2787+
// Not all match.
27562788
Comment("#ifdef GOAMD64_v3")
27572789
// 2016 BMI :TZCNT r64, r64 L: 0.57ns= 2.0c T: 0.29ns= 1.00c
27582790
// 315 AMD64 :BSF r64, r64 L: 0.88ns= 3.1c T: 0.86ns= 3.00c
@@ -2765,13 +2797,6 @@ func (o options) matchLen(name string, a, b, len reg.GPVirtual, end LabelRef) re
27652797
LEAL(Mem{Base: matched, Index: tmp, Scale: 1}, matched)
27662798
JMP(end)
27672799

2768-
// All 8 byte matched, update and loop.
2769-
Label("matchlen_loop_" + name)
2770-
LEAL(Mem{Base: len, Disp: -8}, len.As32())
2771-
LEAL(Mem{Base: matched, Disp: 8}, matched)
2772-
CMPL(len.As32(), U8(8))
2773-
JAE(LabelRef("matchlen_loopback_" + name))
2774-
27752800
// Less than 8 bytes left.
27762801
// Test 4 bytes...
27772802
Label("matchlen_match4_" + name)

0 commit comments

Comments
 (0)