33import com .github .myibu .algorithm .data .Bits ;
44import com .github .myibu .algorithm .endode .GolombEncoder ;
55
6+ import java .math .BigDecimal ;
7+ import java .math .RoundingMode ;
68import java .util .*;
79import java .util .stream .Collectors ;
810
@@ -74,41 +76,40 @@ public int compress(byte[] in_data, int in_len, byte[] out_data) {
7476 }
7577 int llStart = sEnd - 1 , rrStart = 0 , llEnd = 0 , rrEnd = (lp = lEnd );
7678 int minMatched = 1 , minIndex = 0 ;
77- while ( llStart >= 0 ) {
78- int matched = 0 , left = llStart , right = rrStart ;
79- while (left >= 0 && right < rrEnd && sBuf [left --] == lWindow [right ++]) {
79+ for ( int i = llStart ; i >= 0 ; i -- ) {
80+ int matched = 0 , left = i , right = rrStart ;
81+ while (left >= llEnd && right < rrEnd && sBuf [left --] == lWindow [right ++]) {
8082 matched ++;
8183 }
8284 if (matched >= minMatched ) {
83- minIndex = llStart ;
85+ minIndex = i ;
8486 minMatched = matched ;
8587 }
86- llStart --;
8788 }
89+ int lWindowLen = lWindow .length ;
8890 // only one byte in window, set tuple to (0, 0, lWindow[0])
89- if (lWindow . length == 1 ) {
91+ if (lWindowLen == 1 ) {
9092 minIndex = 0 ;
9193 }
9294 // matched
9395 if (minIndex > 0 ) {
94- // 匹配到5个怎么编码
95- tuples .add (Arrays .asList ( minIndex + 1 , minMatched , (int )lWindow [minMatched ]));
96- sp += (minMatched + 1 );
97- ip += (minMatched + 1 );
96+ tuples .add (Arrays .asList ( minIndex + 1 , minMatched , (minMatched == lWindowLen ) ? null : (int )lWindow [minMatched ]));
97+ sp += ((minMatched == lWindowLen ) ? minMatched : (minMatched + 1 ));
98+ ip += ((minMatched == lWindowLen ) ? minMatched : (minMatched + 1 ));
9899 } else {
99100 sp ++;
100101 ip ++;
101102 tuples .add (Arrays .asList (0 , 0 , (int )lWindow [0 ]));
102103 }
103104 if (isDebug ) {
104- System .out .println ("Txt=" + new String () + new String ( in_data ) + " , SearchBuffer="
105+ System .out .println (", SearchBuffer="
105106 + new StringBuilder (new String (sBuf )).reverse ().toString () + ", LookaheadWindow=" + new String (lWindow )
106107 + " | " + tuples .get (tuples .size ()-1 )/* + " | " + (char)(tuples.get(tuples.size()-1).get(2).intValue())*/ );
107108 }
108109 }
109110 int compressedLen = doEncode (tuples , out_data );
110111 if (isDebug ) {
111- System .out .println ("after encode: compressed rate=" + (compressedLen * 1 .0 / in_len ));
112+ System .out .println ("after encode: compressed rate=" + new BigDecimal (compressedLen * 100 .0 / in_len ). setScale ( 2 , RoundingMode . HALF_UP ) + "%" );
112113 }
113114 return compressedLen ;
114115 }
@@ -122,8 +123,11 @@ private int doEncode(List<List<Integer>> tuples, byte[] out_data) {
122123 bits .append (bits1 );
123124 Bits bits2 = encoder .encode (tuple .get (1 ), l );
124125 bits .append (bits2 );
125- Bits bits3 = Bits .ofByte ((byte )tuple .get (2 ).intValue ());
126- bits .append (bits3 );
126+ Bits bits3 = new Bits ();
127+ if (tuple .get (2 ) != null ) {
128+ bits3 = Bits .ofByte ((byte ) tuple .get (2 ).intValue ());
129+ bits .append (bits3 );
130+ }
127131 if (isDebug ) {
128132 System .out .println (tuple + " encoded result: " + "(" + bits1 + ", " + bits2 + ", " + bits3 + ")" );
129133 }
@@ -181,12 +185,16 @@ public int decompress(byte[] in_data, int in_len, byte[] out_data) {
181185 }
182186 }
183187 }
184- if (length == -1 || ip + 8 > bits . length () ) {
188+ if (length == -1 ) {
185189 break ;
186190 }
187- int symbol = (int )bits .subBits (ip , ip +8 ).toByte ();
188- tuples .add (Arrays .asList (offset , length , symbol ));
189- ip += 8 ;
191+ if (length != l && ip + 8 <= bits .length ()) {
192+ int symbol = (int ) bits .subBits (ip , ip + 8 ).toByte ();
193+ tuples .add (Arrays .asList (offset , length , symbol ));
194+ ip += 8 ;
195+ } else {
196+ tuples .add (Arrays .asList (offset , length , null ));
197+ }
190198 }
191199 if (isDebug ) {
192200 System .out .println ("decode tuples=" + tuples );
@@ -197,17 +205,35 @@ public int decompress(byte[] in_data, int in_len, byte[] out_data) {
197205 private int doDecode (List <List <Integer >> tuples , byte [] out_data ) {
198206 Bits seq = new Bits ();
199207 for (List <Integer > tuple : tuples ) {
200- int offset = tuple .get (0 ), length = tuple .get (1 ), symbol = tuple .get (2 );
201- Bits sb = Bits .ofByte ((byte ) symbol );
202- if (offset == 0 ) {
203- seq .append (sb );
208+ int offset = tuple .get (0 ), length = tuple .get (1 );
209+ if (tuple .get (2 ) != null ) {
210+ int symbol = tuple .get (2 );
211+ Bits sb = Bits .ofByte ((byte ) symbol );
212+ if (offset == 0 ) {
213+ seq .append (sb );
214+ if (isDebug ) {
215+ System .out .println (tuple + ", seq=" + new String (seq .toByteArray ()));
216+ }
217+ } else {
218+ int start = seq .byteLength () < s ? seq .byteLength () - offset : s - offset ;
219+ int used = seq .byteLength () < s ? 0 : seq .byteLength () - s ;
220+ seq .append (seq .subBits ((used + start ) * 8 , (used + start + length ) * 8 )).append (sb );
221+ if (isDebug ) {
222+ System .out .println (tuple + ", seq=" + new String (seq .toByteArray ()));
223+ }
224+ }
204225 } else {
205226 int start = seq .byteLength () < s ? seq .byteLength () - offset : s - offset ;
206227 int used = seq .byteLength () < s ? 0 : seq .byteLength () - s ;
207- seq .append (seq .subBits ((used + start ) * 8 , (used + start + length ) * 8 )).append (sb );
208- // System.out.println("start=" + start + ", used=" + used + ", length=" + length + ", seq=" + seq);
228+ seq .append (seq .subBits ((used + start ) * 8 , (used + start + length ) * 8 ));
229+ if (isDebug ) {
230+ System .out .println (tuple + ", seq=" + new String (seq .toByteArray ()));
231+ }
209232 }
210233 }
234+ if (isDebug ) {
235+ System .out .println ("after decode, bits=" + seq );
236+ }
211237 int len = seq .byteLength ();
212238 for (int i = 0 ; i < len ; i ++) {
213239 out_data [i ] = seq .getByte (i ).toByte ();
@@ -222,4 +248,9 @@ private int doDecode(List<List<Integer>> tuples, byte[] out_data) {
222248 public void setDebug (boolean isDebug ) {
223249 this .isDebug = isDebug ;
224250 }
251+
252+ public void setSL (int s , int l ) {
253+ this .s = s ;
254+ this .l = l ;
255+ }
225256}
0 commit comments