From 20d0f941eef7356895b4f9d4abe3c36a81c6309c Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Sat, 7 May 2022 04:18:05 -0700 Subject: [PATCH] inflate: Keep dict on stack (#581) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` λ benchcmp before.txt after.txt benchmark old ns/op new ns/op delta BenchmarkDecodeDigitsSpeed1e4-32 45478 44642 -1.84% BenchmarkDecodeDigitsSpeed1e5-32 520045 506007 -2.70% BenchmarkDecodeDigitsSpeed1e6-32 5213200 5067185 -2.80% BenchmarkDecodeDigitsDefault1e4-32 49476 48936 -1.09% BenchmarkDecodeDigitsDefault1e5-32 496733 490872 -1.18% BenchmarkDecodeDigitsDefault1e6-32 4869294 4851319 -0.37% BenchmarkDecodeDigitsCompress1e4-32 44262 44419 +0.35% BenchmarkDecodeDigitsCompress1e5-32 470795 473693 +0.62% BenchmarkDecodeDigitsCompress1e6-32 4690131 4689175 -0.02% BenchmarkDecodeTwainSpeed1e4-32 49316 49218 -0.20% BenchmarkDecodeTwainSpeed1e5-32 531644 527865 -0.71% BenchmarkDecodeTwainSpeed1e6-32 5316889 5281027 -0.67% BenchmarkDecodeTwainDefault1e4-32 49570 49543 -0.05% BenchmarkDecodeTwainDefault1e5-32 492474 488411 -0.83% BenchmarkDecodeTwainDefault1e6-32 4902696 4844194 -1.19% BenchmarkDecodeTwainCompress1e4-32 47788 47235 -1.16% BenchmarkDecodeTwainCompress1e5-32 465616 454616 -2.36% BenchmarkDecodeTwainCompress1e6-32 4606437 4513280 -2.02% BenchmarkDecodeRandomSpeed1e4-32 299 296 -0.94% BenchmarkDecodeRandomSpeed1e5-32 1930 1942 +0.62% BenchmarkDecodeRandomSpeed1e6-32 19980 19990 +0.05% ``` --- flate/_gen/gen_inflate.go | 20 ++++---- flate/inflate_gen.go | 100 +++++++++++++++++++------------------- 2 files changed, 60 insertions(+), 60 deletions(-) diff --git a/flate/_gen/gen_inflate.go b/flate/_gen/gen_inflate.go index ff43f79dd4..64ff1aad4c 100644 --- a/flate/_gen/gen_inflate.go +++ b/flate/_gen/gen_inflate.go @@ -48,7 +48,7 @@ func (f *decompressor) $FUNCNAME$() { // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb := f.nb, f.b + fnb, fb, dict := f.nb, f.b, &f.dict switch f.stepState { case stateInit: @@ -106,9 +106,9 @@ readLiteral: var length int switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).$FUNCNAME$ f.stepState = stateInit f.b, f.nb = fb, fnb @@ -251,10 +251,10 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > uint32(f.dict.histSize()) { + if dist > uint32(dict.histSize()) { f.b, f.nb = fb, fnb if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) } f.err = CorruptInputError(f.roffset) return @@ -267,14 +267,14 @@ readLiteral: copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = dict.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).$FUNCNAME$ // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb diff --git a/flate/inflate_gen.go b/flate/inflate_gen.go index 8d632cea0f..61342b6b88 100644 --- a/flate/inflate_gen.go +++ b/flate/inflate_gen.go @@ -24,7 +24,7 @@ func (f *decompressor) huffmanBytesBuffer() { // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb := f.nb, f.b + fnb, fb, dict := f.nb, f.b, &f.dict switch f.stepState { case stateInit: @@ -82,9 +82,9 @@ readLiteral: var length int switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBytesBuffer f.stepState = stateInit f.b, f.nb = fb, fnb @@ -227,10 +227,10 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > uint32(f.dict.histSize()) { + if dist > uint32(dict.histSize()) { f.b, f.nb = fb, fnb if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) } f.err = CorruptInputError(f.roffset) return @@ -243,14 +243,14 @@ readLiteral: copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = dict.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb @@ -275,7 +275,7 @@ func (f *decompressor) huffmanBytesReader() { // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb := f.nb, f.b + fnb, fb, dict := f.nb, f.b, &f.dict switch f.stepState { case stateInit: @@ -333,9 +333,9 @@ readLiteral: var length int switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBytesReader f.stepState = stateInit f.b, f.nb = fb, fnb @@ -478,10 +478,10 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > uint32(f.dict.histSize()) { + if dist > uint32(dict.histSize()) { f.b, f.nb = fb, fnb if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) } f.err = CorruptInputError(f.roffset) return @@ -494,14 +494,14 @@ readLiteral: copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = dict.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBytesReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb @@ -526,7 +526,7 @@ func (f *decompressor) huffmanBufioReader() { // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb := f.nb, f.b + fnb, fb, dict := f.nb, f.b, &f.dict switch f.stepState { case stateInit: @@ -584,9 +584,9 @@ readLiteral: var length int switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBufioReader f.stepState = stateInit f.b, f.nb = fb, fnb @@ -729,10 +729,10 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > uint32(f.dict.histSize()) { + if dist > uint32(dict.histSize()) { f.b, f.nb = fb, fnb if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) } f.err = CorruptInputError(f.roffset) return @@ -745,14 +745,14 @@ readLiteral: copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = dict.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBufioReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb @@ -777,7 +777,7 @@ func (f *decompressor) huffmanStringsReader() { // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb := f.nb, f.b + fnb, fb, dict := f.nb, f.b, &f.dict switch f.stepState { case stateInit: @@ -835,9 +835,9 @@ readLiteral: var length int switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanStringsReader f.stepState = stateInit f.b, f.nb = fb, fnb @@ -980,10 +980,10 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > uint32(f.dict.histSize()) { + if dist > uint32(dict.histSize()) { f.b, f.nb = fb, fnb if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) } f.err = CorruptInputError(f.roffset) return @@ -996,14 +996,14 @@ readLiteral: copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = dict.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanStringsReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb @@ -1028,7 +1028,7 @@ func (f *decompressor) huffmanGenericReader() { // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb := f.nb, f.b + fnb, fb, dict := f.nb, f.b, &f.dict switch f.stepState { case stateInit: @@ -1086,9 +1086,9 @@ readLiteral: var length int switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanGenericReader f.stepState = stateInit f.b, f.nb = fb, fnb @@ -1231,10 +1231,10 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > uint32(f.dict.histSize()) { + if dist > uint32(dict.histSize()) { f.b, f.nb = fb, fnb if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) } f.err = CorruptInputError(f.roffset) return @@ -1247,14 +1247,14 @@ readLiteral: copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = dict.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanGenericReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb