Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

s2: Add LZ4 block converter #748

Merged
merged 14 commits into from
Feb 17, 2023
Prev Previous commit
Next Next commit
Add fuzz tests.
  • Loading branch information
klauspost committed Feb 16, 2023
commit ea57f35bf9c3af630642c625d8c253100b446d43
142 changes: 142 additions & 0 deletions internal/lz4ref/block.go
Original file line number Diff line number Diff line change
Expand Up @@ -289,3 +289,145 @@ lastLiterals:
di += copy(dst[di:di+len(src)-anchor], src[anchor:])
return di, nil
}

func UncompressBlock(dst, src []byte) (ret int) {
// Restrict capacities so we don't read or write out of bounds.
dst = dst[:len(dst):len(dst)]
src = src[:len(src):len(src)]

const debug = false

const hasError = -2

if len(src) == 0 {
return hasError
}

defer func() {
if r := recover(); r != nil {
if debug {
fmt.Println("recover:", r)
}
ret = hasError
}
}()

var si, di uint
for {
if si >= uint(len(src)) {
return hasError
}
// Literals and match lengths (token).
b := uint(src[si])
si++

// Literals.
if lLen := b >> 4; lLen > 0 {
switch {
case lLen < 0xF && si+16 < uint(len(src)):
// Shortcut 1
// if we have enough room in src and dst, and the literals length
// is small enough (0..14) then copy all 16 bytes, even if not all
// are part of the literals.
copy(dst[di:], src[si:si+16])
si += lLen
di += lLen
if debug {
fmt.Println("ll:", lLen)
}
if mLen := b & 0xF; mLen < 0xF {
// Shortcut 2
// if the match length (4..18) fits within the literals, then copy
// all 18 bytes, even if not all are part of the literals.
mLen += 4
if offset := u16(src[si:]); mLen <= offset && offset < di {
i := di - offset
// The remaining buffer may not hold 18 bytes.
// See https://github.com/pierrec/lz4/issues/51.
if end := i + 18; end <= uint(len(dst)) {
copy(dst[di:], dst[i:end])
si += 2
di += mLen
continue
}
}
}
case lLen == 0xF:
for {
x := uint(src[si])
if lLen += x; int(lLen) < 0 {
if debug {
fmt.Println("int(lLen) < 0")
}
return hasError
}
si++
if x != 0xFF {
break
}
}
fallthrough
default:
copy(dst[di:di+lLen], src[si:si+lLen])
si += lLen
di += lLen
if debug {
fmt.Println("ll:", lLen)
}

}
}

mLen := b & 0xF
if si == uint(len(src)) && mLen == 0 {
break
} else if si >= uint(len(src))-2 {
return hasError
}

offset := u16(src[si:])
if offset == 0 {
return hasError
}
si += 2

// Match.
mLen += minMatch
if mLen == minMatch+0xF {
for {
x := uint(src[si])
if mLen += x; int(mLen) < 0 {
return hasError
}
si++
if x != 0xFF {
break
}
}
}
if debug {
fmt.Println("ml:", mLen, "offset:", offset)
}

// Copy the match.
if di < offset {
return hasError
}

expanded := dst[di-offset:]
if mLen > offset {
// Efficiently copy the match dst[di-offset:di] into the dst slice.
bytesToCopy := offset * (mLen / offset)
for n := offset; n <= bytesToCopy+offset; n *= 2 {
copy(expanded[n:], expanded[:n])
}
di += bytesToCopy
mLen -= bytesToCopy
}
di += uint(copy(dst[di:di+mLen], expanded[:mLen]))
}

return int(di)
}

func u16(p []byte) uint { return uint(binary.LittleEndian.Uint16(p)) }
12 changes: 7 additions & 5 deletions s2/_generate/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ func main() {
o.snappy = false
o.outputMargin = 0
o.maxLen = math.MaxUint32
o.maxOffset = math.MaxUint32 - 1
o.genEmitLiteral()
o.genEmitRepeat()
o.genEmitCopy()
Expand Down Expand Up @@ -136,6 +137,7 @@ type options struct {
bmi1 bool
bmi2 bool
maxLen int
maxOffset int
outputMargin int // Should be at least 5.
maxSkip int
}
Expand All @@ -148,6 +150,7 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m
Pragma("noescape")

o.maxLen = maxLen
o.maxOffset = maxLen - 1
var literalMaxOverhead = maxLitOverheadFor(maxLen)

var tableSize = 4 * (1 << tableBits)
Expand Down Expand Up @@ -802,6 +805,7 @@ func (o options) genEncodeBetterBlockAsm(name string, lTableBits, sTableBits, sk

const sHashBytes = 4
o.maxLen = maxLen
o.maxOffset = maxLen - 1

var lTableSize = 4 * (1 << lTableBits)
var sTableSize = 4 * (1 << sTableBits)
Expand Down Expand Up @@ -1298,7 +1302,7 @@ func (o options) genEncodeBetterBlockAsm(name string, lTableBits, sTableBits, sk
// NOT REPEAT
{
// Check if match is better..
if o.maxLen > 65535 {
if o.maxOffset > 65535 {
CMPL(length.As32(), U8(1))
JG(LabelRef("match_length_ok_" + name))
CMPL(offset32, U32(65535))
Expand Down Expand Up @@ -2050,7 +2054,7 @@ const (
func (o options) emitCopy(name string, length, offset, retval, dstBase reg.GPVirtual, end LabelRef) {
Comment("emitCopy")

if o.maxLen >= 65536 {
if o.maxOffset >= 65536 {
//if offset >= 65536 {
CMPL(offset.As32(), U32(65536))
JL(LabelRef("two_byte_offset_" + name))
Expand Down Expand Up @@ -2781,6 +2785,7 @@ func (o options) cvtLZ4BlockAsm() {
Doc("cvtLZ4BlockAsm converts an LZ4 block to S2", "")
Pragma("noescape")
o.outputMargin = 8
o.maxOffset = math.MaxUint16

const (
errCorrupt = -1
Expand Down Expand Up @@ -2930,8 +2935,6 @@ func (o options) cvtLZ4BlockAsm() {
CMPQ(offset, lastOffset)
JNE(LabelRef("lz4_s2_docopy"))
// Offsets can only be 16 bits
maxLength := o.maxLen
o.maxLen = 65535
{
// emitRepeat16(dst[d:], offset, ml)
o.emitRepeat("lz4_s2", ml, offset, nil, dst, LabelRef("lz4_s2_loop"), false)
Expand All @@ -2942,7 +2945,6 @@ func (o options) cvtLZ4BlockAsm() {
MOVQ(offset, lastOffset)
o.emitCopy("lz4_s2", ml, offset, nil, dst, LabelRef("lz4_s2_loop"))
}
o.maxLen = maxLength

Label("lz4_s2_done")
{
Expand Down
77 changes: 74 additions & 3 deletions s2/encodeblock_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -18038,6 +18038,7 @@ lz4_s2_ml_done:
JNE lz4_s2_docopy

// emitRepeat
emit_repeat_again_lz4_s2:
MOVL R10, R8
LEAL -4(R10), R10
CMPL R8, $0x08
Expand All @@ -18050,6 +18051,28 @@ lz4_s2_ml_done:
cant_repeat_two_offset_lz4_s2:
CMPL R10, $0x00000104
JLT repeat_three_lz4_s2
CMPL R10, $0x00010100
JLT repeat_four_lz4_s2
CMPL R10, $0x0100ffff
JLT repeat_five_lz4_s2
LEAL -16842747(R10), R10
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
JMP emit_repeat_again_lz4_s2

repeat_five_lz4_s2:
LEAL -65536(R10), R10
MOVL R10, R9
MOVW $0x001d, (AX)
MOVW R10, 2(AX)
SARL $0x10, R9
MOVB R9, 4(AX)
ADDQ $0x05, AX
JMP lz4_s2_loop

repeat_four_lz4_s2:
LEAL -256(R10), R10
MOVW $0x0019, (AX)
MOVW R10, 2(AX)
Expand Down Expand Up @@ -18092,16 +18115,19 @@ lz4_s2_docopy:
MOVL $0x00000001, R8
LEAL 16(R8), R8
MOVB R9, 1(AX)
SHRL $0x08, R9
SHLL $0x05, R9
ORL R9, R8
MOVL R9, R11
SHRL $0x08, R11
SHLL $0x05, R11
ORL R11, R8
MOVB R8, (AX)
ADDQ $0x02, AX
SUBL $0x08, R10

// emitRepeat
LEAL -4(R10), R10
JMP cant_repeat_two_offset_lz4_s2_emit_copy_short_2b

emit_repeat_again_lz4_s2_emit_copy_short_2b:
MOVL R10, R8
LEAL -4(R10), R10
CMPL R8, $0x08
Expand All @@ -18114,6 +18140,28 @@ lz4_s2_docopy:
cant_repeat_two_offset_lz4_s2_emit_copy_short_2b:
CMPL R10, $0x00000104
JLT repeat_three_lz4_s2_emit_copy_short_2b
CMPL R10, $0x00010100
JLT repeat_four_lz4_s2_emit_copy_short_2b
CMPL R10, $0x0100ffff
JLT repeat_five_lz4_s2_emit_copy_short_2b
LEAL -16842747(R10), R10
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
JMP emit_repeat_again_lz4_s2_emit_copy_short_2b

repeat_five_lz4_s2_emit_copy_short_2b:
LEAL -65536(R10), R10
MOVL R10, R9
MOVW $0x001d, (AX)
MOVW R10, 2(AX)
SARL $0x10, R9
MOVB R9, 4(AX)
ADDQ $0x05, AX
JMP lz4_s2_loop

repeat_four_lz4_s2_emit_copy_short_2b:
LEAL -256(R10), R10
MOVW $0x0019, (AX)
MOVW R10, 2(AX)
Expand Down Expand Up @@ -18152,6 +18200,7 @@ long_offset_short_lz4_s2:
ADDQ $0x03, AX

// emitRepeat
emit_repeat_again_lz4_s2_emit_copy_short:
MOVL R10, R8
LEAL -4(R10), R10
CMPL R8, $0x08
Expand All @@ -18164,6 +18213,28 @@ long_offset_short_lz4_s2:
cant_repeat_two_offset_lz4_s2_emit_copy_short:
CMPL R10, $0x00000104
JLT repeat_three_lz4_s2_emit_copy_short
CMPL R10, $0x00010100
JLT repeat_four_lz4_s2_emit_copy_short
CMPL R10, $0x0100ffff
JLT repeat_five_lz4_s2_emit_copy_short
LEAL -16842747(R10), R10
MOVW $0x001d, (AX)
MOVW $0xfffb, 2(AX)
MOVB $0xff, 4(AX)
ADDQ $0x05, AX
JMP emit_repeat_again_lz4_s2_emit_copy_short

repeat_five_lz4_s2_emit_copy_short:
LEAL -65536(R10), R10
MOVL R10, R9
MOVW $0x001d, (AX)
MOVW R10, 2(AX)
SARL $0x10, R9
MOVB R9, 4(AX)
ADDQ $0x05, AX
JMP lz4_s2_loop

repeat_four_lz4_s2_emit_copy_short:
LEAL -256(R10), R10
MOVW $0x0019, (AX)
MOVW R10, 2(AX)
Expand Down
3 changes: 3 additions & 0 deletions s2/lz4convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ func (l *LZ4Converter) ConvertBlock(dst, src []byte) ([]byte, int, error) {
}
// 2 byte offset
if s >= len(src)-2 {
if debug {
fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
}
return dst[:d], 0, ErrCorrupt
}
offset := binary.LittleEndian.Uint16(src[s:])
Expand Down
Loading