Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

s2: Improve coding for long, close matches #613

Merged
merged 2 commits into from
Jun 2, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 46 additions & 10 deletions s2/_generate/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m
JZ(LabelRef("repeat_as_copy_" + name))

// Emit as repeat...
o.emitRepeat("match_repeat_"+name, length, offsetVal, nil, dst, LabelRef("repeat_end_emit_"+name))
o.emitRepeat("match_repeat_"+name, length, offsetVal, nil, dst, LabelRef("repeat_end_emit_"+name), false)

// Emit as copy instead...
Label("repeat_as_copy_" + name)
Expand Down Expand Up @@ -1103,7 +1103,7 @@ func (o options) genEncodeBetterBlockAsm(name string, lTableBits, skipLog, lHash
JZ(LabelRef("repeat_as_copy_" + name))

// Emit as repeat...
o.emitRepeat("match_repeat_"+name, length, offsetVal, nil, dst, LabelRef("repeat_end_emit_"+name))
o.emitRepeat("match_repeat_"+name, length, offsetVal, nil, dst, LabelRef("repeat_end_emit_"+name), false)

// Emit as copy instead...
Label("repeat_as_copy_" + name)
Expand Down Expand Up @@ -1314,7 +1314,7 @@ func (o options) genEncodeBetterBlockAsm(name string, lTableBits, skipLog, lHash
// length += 4
ADDL(U8(4), length.As32())
MOVL(s, nextEmitL) // nextEmit = s
o.emitRepeat("match_nolit_repeat_"+name, length, offset, nil, dst, LabelRef("match_nolit_emitcopy_end_"+name))
o.emitRepeat("match_nolit_repeat_"+name, length, offset, nil, dst, LabelRef("match_nolit_emitcopy_end_"+name), false)
}
}
Label("match_nolit_emitcopy_end_" + name)
Expand Down Expand Up @@ -1774,7 +1774,7 @@ func (o options) genEmitRepeat() {
Load(Param("dst").Base(), dstBase)
Load(Param("offset"), offset)
Load(Param("length"), length)
o.emitRepeat("standalone", length, offset, retval, dstBase, LabelRef("gen_emit_repeat_end"))
o.emitRepeat("standalone", length, offset, retval, dstBase, LabelRef("gen_emit_repeat_end"), false)
Label("gen_emit_repeat_end")
Store(retval, ReturnIndex(0))
RET()
Expand All @@ -1786,13 +1786,18 @@ func (o options) genEmitRepeat() {
// retval can be nil.
// Will jump to end label when finished.
// Uses 1 GP register.
func (o options) emitRepeat(name string, length, offset, retval, dstBase reg.GPVirtual, end LabelRef) {
// longer indicates we know match will be > 12
func (o options) emitRepeat(name string, length reg.GPVirtual, offset reg.GPVirtual, retval reg.GPVirtual, dstBase reg.GPVirtual, end LabelRef, longer bool) {
Comment("emitRepeat")
if longer {
// Skip initial length tests
LEAL(Mem{Base: length, Disp: -4}, length.As32()) // length -= 4
JMP(LabelRef("cant_repeat_two_offset_" + name))
}
Label("emit_repeat_again_" + name)
tmp := GP32()
MOVL(length.As32(), tmp) // Copy length
// length -= 4
LEAL(Mem{Base: length, Disp: -4}, length.As32())
MOVL(length.As32(), tmp) // Copy length
LEAL(Mem{Base: length, Disp: -4}, length.As32()) // length -= 4

// if length <= 4 (use copied value)
CMPL(tmp.As32(), U8(8))
Expand Down Expand Up @@ -2014,7 +2019,7 @@ func (o options) emitCopy(name string, length, offset, retval, dstBase reg.GPVir
// return 5 + emitRepeat(dst[5:], offset, length)
// Inline call to emitRepeat. Will jump to end
if !o.snappy {
o.emitRepeat(name+"_emit_copy", length, offset, retval, dstBase, end)
o.emitRepeat(name+"_emit_copy", length, offset, retval, dstBase, end, false)
}
JMP(LabelRef("four_bytes_loop_back_" + name))

Expand Down Expand Up @@ -2050,6 +2055,37 @@ func (o options) emitCopy(name string, length, offset, retval, dstBase reg.GPVir
//if length > 64 {
CMPL(length.As32(), U8(64))
JLE(LabelRef("two_byte_offset_short_" + name))

// if offset < 2048 {
if !o.snappy {
CMPL(offset.As32(), U32(2048))
JAE(LabelRef("long_offset_short_" + name))
// Emit a length 8 copy, encoded as 2 bytes.
// Emit remaining as repeat value (minimum 4 bytes).
// dst[1] = uint8(offset)
// dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
tmp := GP64()
MOVL(U32(tagCopy1), tmp.As32())
// Use scale and displacement to shift and subtract values from length.
LEAL(Mem{Base: tmp, Disp: (8 - 4) << 2}, tmp.As32())
MOVB(offset.As8(), Mem{Base: dstBase, Disp: 1}) // Store offset lower byte
tmp2 := GP64()
MOVL(offset.As32(), tmp2.As32())
SHRL(U8(8), tmp2.As32()) // Remove lower
SHLL(U8(5), tmp2.As32()) // Shift back up
ORL(tmp2.As32(), tmp.As32()) // OR result
MOVB(tmp.As8(), Mem{Base: dstBase, Disp: 0})
if retval != nil {
ADDQ(U8(2), retval) // i += 2
}
ADDQ(U8(2), dstBase) // dst += 2
SUBL(U8(8), length.As32()) // length -= 8
// emitRepeat(dst[2:], offset, length)
o.emitRepeat(name+"_emit_copy_short_2b", length, offset, retval, dstBase, end, true)

Label("long_offset_short_" + name)
}

// Emit a length 60 copy, encoded as 3 bytes.
// Emit remaining as repeat value (minimum 4 bytes).
// dst[2] = uint8(offset >> 8)
Expand All @@ -2069,7 +2105,7 @@ func (o options) emitCopy(name string, length, offset, retval, dstBase reg.GPVir
}
// Inline call to emitRepeat. Will jump to end
if !o.snappy {
o.emitRepeat(name+"_emit_copy_short", length, offset, retval, dstBase, end)
o.emitRepeat(name+"_emit_copy_short", length, offset, retval, dstBase, end, false)
}
JMP(LabelRef("two_byte_offset_" + name))

Expand Down
4 changes: 4 additions & 0 deletions s2/encode_best.go
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,10 @@ func emitCopySize(offset, length int) int {

// Offset no more than 2 bytes.
if length > 64 {
if offset < 2048 {
// Emit 8 bytes, then rest as repeats...
return 2 + emitRepeatSize(offset, length-8)
}
// Emit remaining as repeats, at least 4 bytes remain.
return 3 + emitRepeatSize(offset, length-60)
}
Expand Down
23 changes: 16 additions & 7 deletions s2/encode_go.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,14 +180,23 @@ func emitCopy(dst []byte, offset, length int) int {

// Offset no more than 2 bytes.
if length > 64 {
// Emit a length 60 copy, encoded as 3 bytes.
// Emit remaining as repeat value (minimum 4 bytes).
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = 59<<2 | tagCopy2
length -= 60
off := 3
if offset < 2048 {
// emit 8 bytes as tagCopy1, rest as repeats.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
length -= 8
off = 2
} else {
// Emit a length 60 copy, encoded as 3 bytes.
// Emit remaining as repeat value (minimum 4 bytes).
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = 59<<2 | tagCopy2
length -= 60
}
// Emit remaining as repeats, at least 4 bytes remain.
return 3 + emitRepeat(dst[3:], offset, length)
return off + emitRepeat(dst[off:], offset, length)
}
if length >= 12 || offset >= 2048 {
// Emit the remaining copy, encoded as 3 bytes.
Expand Down
Loading