Skip to content

Commit 0dcdbd0

Browse files
committed
Update comments
1 parent dd617a4 commit 0dcdbd0

File tree

2 files changed

+94
-53
lines changed

2 files changed

+94
-53
lines changed

zstd/seqdec_amd64.s

Lines changed: 44 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -25,34 +25,48 @@ TEXT ·sequenceDecs_decode_amd64(SB), NOSPLIT, $8
2525
/*
2626
This procedure implements the following sequence:
2727
28-
// s.next()
29-
br.fill()
30-
mo, moB := ofState.final()
31-
mo += br.getBits(moB)
32-
33-
br.fill()
34-
ml, mlB := mlState.final()
35-
ml += br.getBits(mlB)
36-
37-
ll, llB := llState.final()
38-
ll += br.getBits(llB)
39-
40-
br.fill()
41-
if i != 0 {
42-
nBits := ctx.llState.nbBits() + ctx.mlState.nbBits() + ctx.ofState.nbBits()
43-
bits := br.get32BitsFast(nBits)
44-
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
45-
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
46-
47-
lowBits = uint16(bits >> (ofState.nbBits() & 31))
48-
lowBits &= bitMask[mlState.nbBits()&15]
49-
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
50-
51-
lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
52-
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
28+
for ctx.iteration >= 0 {
29+
// s.next()
30+
br.fill()
31+
mo, moB := ofState.final()
32+
mo += br.getBits(moB)
33+
34+
br.fill()
35+
ml, mlB := mlState.final()
36+
ml += br.getBits(mlB)
37+
38+
ll, llB := llState.final()
39+
ll += br.getBits(llB)
40+
41+
br.fill()
42+
if ctx.iteration != 0 {
43+
nBits := ctx.llState.nbBits() + ctx.mlState.nbBits() + ctx.ofState.nbBits()
44+
bits := br.get32BitsFast(nBits)
45+
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
46+
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
47+
48+
lowBits = uint16(bits >> (ofState.nbBits() & 31))
49+
lowBits &= bitMask[mlState.nbBits()&15]
50+
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
51+
52+
lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
53+
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
54+
}
55+
56+
mo = s.adjustOffset(mo, ll, moB)
57+
58+
if ml > maxMatchLen {
59+
return errorMatchLenTooBig
60+
}
61+
if mo == 0 && ml > 0 {
62+
return errorMatchLenOfsMismatch
63+
}
64+
65+
ctx.iteration -= 1
5366
}
5467
55-
mo = s.adjustOffset(mo, ll, moB)
68+
return 0
69+
5670
*/
5771
#define br_value R8 // br.value
5872
#define br_bits_read R9 // br.bitsRead
@@ -288,7 +302,7 @@ br_fill_byte_by_byte_3:
288302
br_fill_end_3:
289303
// bitreader_fill end
290304

291-
// if i != 0 {
305+
// if ctx.iteration != 0 {
292306
// nBits := ctx.llState.nbBits() + ctx.mlState.nbBits() + ctx.ofState.nbBits()
293307
// bits := br.get32BitsFast(nBits)
294308
// lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
@@ -545,15 +559,16 @@ check_triple:
545559
return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
546560
}
547561
*/
562+
XORQ AX, AX
548563
TESTQ BX, BX
549564
SETEQ DL
550565
CMPQ CX, $0
551566
SETHI AL
552567
ANDQ DX, AX
553-
TESTB AL, AL
568+
TESTQ AX, AX
554569
JNZ error_match_len_ofs_mismatch
555570

556-
ADDQ $24, seqs
571+
ADDQ $24, seqs // sizof(seqVals) == 3*8
557572

558573
DECQ decodeAsmContext_iteration(DI)
559574
JNS main_loop

zstd/seqdec_amd64.s.in

Lines changed: 50 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,16 @@ br_fill_end{{.}}:
185185
#endif
186186
{{end}}
187187

188+
{{/*
189+
Input:
190+
AX - number of bits
191+
192+
Output:
193+
BX - value
194+
195+
Clobbers:
196+
AX, BX, CX
197+
*/}}
188198
{{define "get_bits"}}
189199
#ifdef GOAMD64_v3
190200
LEAQ (br_bits_read)(AX*1), CX
@@ -218,34 +228,48 @@ TEXT ·sequenceDecs_decode_amd64(SB), NOSPLIT, $8
218228
/*
219229
This procedure implements the following sequence:
220230

221-
// s.next()
222-
br.fill()
223-
mo, moB := ofState.final()
224-
mo += br.getBits(moB)
231+
for ctx.iteration >= 0 {
232+
// s.next()
233+
br.fill()
234+
mo, moB := ofState.final()
235+
mo += br.getBits(moB)
236+
237+
br.fill()
238+
ml, mlB := mlState.final()
239+
ml += br.getBits(mlB)
225240

226-
br.fill()
227-
ml, mlB := mlState.final()
228-
ml += br.getBits(mlB)
241+
ll, llB := llState.final()
242+
ll += br.getBits(llB)
229243

230-
ll, llB := llState.final()
231-
ll += br.getBits(llB)
244+
br.fill()
245+
if ctx.iteration != 0 {
246+
nBits := ctx.llState.nbBits() + ctx.mlState.nbBits() + ctx.ofState.nbBits()
247+
bits := br.get32BitsFast(nBits)
248+
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
249+
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
232250

233-
br.fill()
234-
if i != 0 {
235-
nBits := ctx.llState.nbBits() + ctx.mlState.nbBits() + ctx.ofState.nbBits()
236-
bits := br.get32BitsFast(nBits)
237-
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
238-
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
251+
lowBits = uint16(bits >> (ofState.nbBits() & 31))
252+
lowBits &= bitMask[mlState.nbBits()&15]
253+
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
239254

240-
lowBits = uint16(bits >> (ofState.nbBits() & 31))
241-
lowBits &= bitMask[mlState.nbBits()&15]
242-
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
255+
lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
256+
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
257+
}
243258

244-
lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
245-
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
259+
mo = s.adjustOffset(mo, ll, moB)
260+
261+
if ml > maxMatchLen {
262+
return errorMatchLenTooBig
263+
}
264+
if mo == 0 && ml > 0 {
265+
return errorMatchLenOfsMismatch
266+
}
267+
268+
ctx.iteration -= 1
246269
}
247270

248-
mo = s.adjustOffset(mo, ll, moB)
271+
return 0
272+
249273
*/
250274
#define br_value R8 // br.value
251275
#define br_bits_read R9 // br.bitsRead
@@ -316,7 +340,7 @@ main_loop:
316340
{{template "bitreader_fill" .}}
317341
{{end}}
318342

319-
// if i != 0 {
343+
// if ctx.iteration != 0 {
320344
// nBits := ctx.llState.nbBits() + ctx.mlState.nbBits() + ctx.ofState.nbBits()
321345
// bits := br.get32BitsFast(nBits)
322346
// lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
@@ -420,20 +444,22 @@ check_triple:
420444
return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
421445
}
422446
*/
447+
XORQ AX, AX
423448
TESTQ BX, BX
424449
SETEQ DL
425450
CMPQ CX, $0
426451
SETHI AL
427452
ANDQ DX, AX
428-
TESTB AL, AL
453+
TESTQ AX, AX
429454
JNZ error_match_len_ofs_mismatch
430455

431-
ADDQ $24, seqs
456+
ADDQ $24, seqs // sizof(seqVals) == 3*8
432457

433458
DECQ decodeAsmContext_iteration(DI)
434459
JNS main_loop
435460

436461
XORQ AX, AX
462+
437463
end:
438464
MOVQ 0(SP), BP
439465
MOVQ AX, ret+24(FP)

0 commit comments

Comments
 (0)