From f2ad3b09b89bf57bfd3278e4763fadbb678ad03e Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 20 Nov 2015 17:59:05 +0100 Subject: [PATCH 01/22] dev branch --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 309594a..a21b019 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ # Directories +_lz5bench/ _codelite/ cmake_unofficial/ contrib/ From ec454840c7dd9a3c14bf899d5526e7bb9b3770f8 Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 20 Nov 2015 18:50:59 +0100 Subject: [PATCH 02/22] hashTable3 --- lib/lz5.c | 9 ++++++--- lib/lz5hc.c | 43 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/lib/lz5.c b/lib/lz5.c index faf6cba..546bb37 100644 --- a/lib/lz5.c +++ b/lib/lz5.c @@ -208,6 +208,8 @@ static void LZ5_write16(void* memPtr, U16 value) #endif // LZ5_FORCE_MEMORY_ACCESS +#define LZ5_read24(ptr) (uint32_t)(LZ5_read32(ptr)<<8) +//#define LZ5_read24(ptr) LZ5_read32(ptr) static U16 LZ5_readLE16(const void* memPtr) { @@ -292,7 +294,8 @@ static void LZ5_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) /************************************** * Common Constants **************************************/ -#define MINMATCH 4 +#define MINMATCH 3 +#define MINMATCH4 4 #define WILDCOPYLENGTH 8 #define LASTLITERALS 5 @@ -463,9 +466,9 @@ int LZ5_sizeofState() { return LZ5_STREAMSIZE; } static U32 LZ5_hashSequence(U32 sequence, tableType_t const tableType) { if (tableType == byU16) - return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ5_HASHLOG+1))); + return (((sequence) * 2654435761U) >> ((MINMATCH4*8)-(LZ5_HASHLOG+1))); else - return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ5_HASHLOG)); + return (((sequence) * 2654435761U) >> ((MINMATCH4*8)-LZ5_HASHLOG)); } static const U64 prime5bytes = 889523592379ULL; diff --git a/lib/lz5hc.c b/lib/lz5hc.c index ae1a7c8..ed3f379 100644 --- a/lib/lz5hc.c +++ b/lib/lz5hc.c @@ -81,8 +81,9 @@ static const int LZ5HC_compressionLevel_default = 9; #define MAXD_MASK (MAXD - 1) #define HASH_LOG (DICTIONARY_LOGSIZE-1) +#define HASH_LOG3 13 #define HASHTABLESIZE (1 << HASH_LOG) -#define HASH_MASK (HASHTABLESIZE - 1) +#define HASHTABLESIZE3 (1 << HASH_LOG3) #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) @@ -95,6 +96,7 @@ static const int g_maxCompressionLevel = 16; struct LZ5HC_Data_s { U32* hashTable; + U32* hashTable3; U32* chainTable; const BYTE* end; /* next block here to continue on current prefix */ const BYTE* base; /* All index relative to this position */ @@ -106,16 +108,17 @@ struct LZ5HC_Data_s U32 compressionLevel; }; - /************************************** * Local Macros **************************************/ -#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-HASH_LOG)) +#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH4*8)-HASH_LOG)) +#define HASH_FUNCTION3(i) (((i) * 506832829U) >> ((MINMATCH4*8)-HASH_LOG3)) //#define DELTANEXTU16(p) chainTable[(p) & MAXD_MASK] /* flexible, MAXD dependent */ #define DELTANEXTU16(p) chainTable[(U16)(p)] /* faster */ #define DELTANEXTU32(p) chainTable[(p) & MAXD_MASK] /* flexible, MAXD dependent */ static U32 LZ5HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ5_read32(ptr)); } +static U32 LZ5HC_hashPtr3(const void* ptr) { return HASH_FUNCTION3(LZ5_read32(ptr)); } #define LZ5HC_LIMIT (1<hashTable, 0, sizeof(U32)*HASHTABLESIZE); + MEM_INIT((void*)hc4->hashTable3, 0, sizeof(U32)*HASHTABLESIZE3); MEM_INIT(hc4->chainTable, 0xFF, sizeof(U32)*MAXD); hc4->nextToUpdate = LZ5HC_LIMIT; hc4->base = start - LZ5HC_LIMIT; @@ -187,7 +191,7 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* hc4, /* I { match = base + matchIndex; if (*(match+ml) == *(ip+ml) - && (LZ5_read32(match) == LZ5_read32(ip))) + && (LZ5_read24(match) == LZ5_read24(ip))) { size_t mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; if (mlt > ml) { ml = mlt; *matchpos = match; } @@ -196,7 +200,7 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* hc4, /* I else { match = dictBase + matchIndex; - if (LZ5_read32(match) == LZ5_read32(ip)) + if (LZ5_read24(match) == LZ5_read24(ip)) { size_t mlt; const BYTE* vLimit = ip + (dictLimit - matchIndex); @@ -211,6 +215,26 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* hc4, /* I matchIndex -= DELTANEXTU32(matchIndex); } + U32* const hashTable3 = hc4->hashTable3; + size_t h = LZ5HC_hashPtr3(ip); + + if (!ml) + { + size_t offset = ip - base - hashTable3[h]; + + if (offset > 0 && offset < MAX_DISTANCE) + { + match = ip - offset; + if (match > base && LZ5_read24(ip) == LZ5_read24(match)) + { + ml = LZ5_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + *matchpos = match; + } + } + } + + hashTable3[h] = ip - base; + return (int)ml; } @@ -248,7 +272,7 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( { const BYTE* matchPtr = base + matchIndex; if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) - if (LZ5_read32(matchPtr) == LZ5_read32(ip)) + if (LZ5_read24(matchPtr) == LZ5_read24(ip)) { int mlt = MINMATCH + LZ5_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); int back = 0; @@ -271,7 +295,7 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( else { const BYTE* matchPtr = dictBase + matchIndex; - if (LZ5_read32(matchPtr) == LZ5_read32(ip)) + if (LZ5_read24(matchPtr) == LZ5_read24(ip)) { size_t mlt; int back=0; @@ -289,6 +313,7 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( matchIndex -= DELTANEXTU32(matchIndex); } + return longest; } @@ -590,6 +615,10 @@ int LZ5_alloc_mem_HC(LZ5HC_Data_Structure* statePtr) if (!statePtr->hashTable) return 0; + statePtr->hashTable3 = ALLOCATOR(1, sizeof(U32)*HASHTABLESIZE3); + if (!statePtr->hashTable3) + return 0; + statePtr->chainTable = ALLOCATOR(1, sizeof(U32)*MAXD); if (!statePtr->chainTable) { From d4fb629e53982b54eab5bb99a30ba0449863e8b0 Mon Sep 17 00:00:00 2001 From: inikep Date: Wed, 25 Nov 2015 12:39:57 +0100 Subject: [PATCH 03/22] support for last_off --- README.md | 3 ++- lib/lz5.c | 66 ++++++++++++++++++++++++++++++++++++----------------- lib/lz5hc.c | 37 +++++++++++++++++++----------- 3 files changed, 71 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index f4e69c9..8b2f1b1 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,8 @@ LZ5 uses different output codewords and is not compatible with LZ4. LZ4 output c LZ5 uses 3 types of codewords from 2 to 4 bytes long: - 1_OO_LL_MMM OOOOOOOO - 10-bit offset, 3-bit match length, 2-bit literal length - 00_LLL_MMM OOOOOOOO OOOOOOOO - 16-bit offset, 3-bit match length, 3-bit literal length -- 01_LLL_MMM OOOOOOOO OOOOOOOO OOOOOOOO - 24-bit offset, 3-bit match length, 3-bit literal length +- 010_LL_MMM OOOOOOOO OOOOOOOO OOOOOOOO - 24-bit offset, 3-bit match length, 2-bit literal length +- 011_LL_MMM - last offset, 3-bit match length, 2-bit literal length [LZ4]: https://github.com/Cyan4973/lz4 diff --git a/lib/lz5.c b/lib/lz5.c index 546bb37..357eb38 100644 --- a/lib/lz5.c +++ b/lib/lz5.c @@ -546,7 +546,7 @@ FORCE_INLINE int LZ5_compress_generic( BYTE* op = (BYTE*) dest; BYTE* const olimit = op + maxOutputSize; - U32 forwardH; + U32 forwardH, last_off=1; size_t refDelta=0; /* Init conditions */ @@ -625,12 +625,12 @@ FORCE_INLINE int LZ5_compress_generic( if ((outputLimited) && (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit))) return 0; /* Check output limit */ - if (ip-match < (1<<10)) + if (ip-match >= (1<<10) && ip-match < (1<<16) && ip-match != last_off) { - if (litLength>=RUN_MASK2) + if (litLength>=RUN_MASK) { - int len = (int)litLength-RUN_MASK2; - *token=(RUN_MASK2<= 255 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } @@ -638,10 +638,10 @@ FORCE_INLINE int LZ5_compress_generic( } else { - if (litLength>=RUN_MASK) + if (litLength>=RUN_MASK2) { - int len = (int)litLength-RUN_MASK; - *token=(RUN_MASK<= 255 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } @@ -655,6 +655,12 @@ FORCE_INLINE int LZ5_compress_generic( _next_match: /* Encode Offset */ + if (ip-match == last_off) + { + *token+=(3<>8))<= (1<<10) && ip-match < (1<<16) && ip-match != last_off) { - if (litLength>=RUN_MASK2) + if (litLength>=RUN_MASK) { - int len = (int)litLength-RUN_MASK2; - *token=(RUN_MASK2<= 255 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } @@ -924,10 +932,10 @@ static int LZ5_compress_destSize_generic( } else { - if (litLength>=RUN_MASK) + if (litLength>=RUN_MASK2) { - int len = (int)litLength-RUN_MASK; - *token=(RUN_MASK<= 255 ; len-=255) *op++ = 255; *op++ = (BYTE)len; } @@ -941,6 +949,11 @@ static int LZ5_compress_destSize_generic( _next_match: /* Encode Offset */ + if (ip-match == last_off) + { + *token+=(3<>8))< oend-MFLIMIT)) oexit = oend-MFLIMIT; /* targetOutputSize too high => decode everything */ @@ -1298,7 +1313,7 @@ FORCE_INLINE int LZ5_decompress_generic( /* get literal length */ token = *ip++; - if (token>>7) + if (token>>6) { if ((length=(token>>ML_BITS)&RUN_MASK2) == RUN_MASK2) { @@ -1362,10 +1377,19 @@ FORCE_INLINE int LZ5_decompress_generic( { offset = LZ5_readLE16(ip); ip+=2; } - else // length == 1 + else + if ((token>>ML_RUN_BITS2) == 2) { offset = LZ5_readLE24(ip); ip+=3; } + else // (token>>ML_RUN_BITS2) == 3 + { + offset = last_off; +// printf("2last_off=%d\n", offset); + } + + last_off = offset; + // printf("1last_off=%d\n", last_off); match = op - offset; if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error; /* Error : offset outside buffers */ diff --git a/lib/lz5hc.c b/lib/lz5hc.c index ed3f379..c86130c 100644 --- a/lib/lz5hc.c +++ b/lib/lz5hc.c @@ -84,6 +84,7 @@ static const int LZ5HC_compressionLevel_default = 9; #define HASH_LOG3 13 #define HASHTABLESIZE (1 << HASH_LOG) #define HASHTABLESIZE3 (1 << HASH_LOG3) +#define SHORT_OFFSET_DISTANCE (1<<10) #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) @@ -106,6 +107,7 @@ struct LZ5HC_Data_s U32 lowLimit; /* below that point, no more dict */ U32 nextToUpdate; /* index from which to continue dictionary update */ U32 compressionLevel; + U32 last_off; }; /************************************** @@ -137,6 +139,7 @@ static void LZ5HC_init (LZ5HC_Data_Structure* hc4, const BYTE* start) hc4->dictBase = start - LZ5HC_LIMIT; hc4->dictLimit = LZ5HC_LIMIT; hc4->lowLimit = LZ5HC_LIMIT; + hc4->last_off = 1; } @@ -215,14 +218,14 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* hc4, /* I matchIndex -= DELTANEXTU32(matchIndex); } - U32* const hashTable3 = hc4->hashTable3; +/* U32* const hashTable3 = hc4->hashTable3; size_t h = LZ5HC_hashPtr3(ip); if (!ml) { size_t offset = ip - base - hashTable3[h]; - if (offset > 0 && offset < MAX_DISTANCE) + if (offset > 0 && offset < SHORT_OFFSET_DISTANCE) { match = ip - offset; if (match > base && LZ5_read24(ip) == LZ5_read24(match)) @@ -234,7 +237,7 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* hc4, /* I } hashTable3[h] = ip - base; - + */ return (int)ml; } @@ -326,6 +329,7 @@ static unsigned debug = 0; #endif FORCE_INLINE int LZ5HC_encodeSequence ( + LZ5HC_Data_Structure* ctx, const BYTE** ip, BYTE** op, const BYTE** anchor, @@ -346,16 +350,16 @@ FORCE_INLINE int LZ5HC_encodeSequence ( token = (*op)++; if ((limitedOutputBuffer) && ((*op + (length>>8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1; /* Check output limit */ - if (*ip-match < (1<<10)) + if (*ip-match >= (1<<10) && *ip-match < (1<<16) && *ip-match != ctx->last_off) { - if (length>=(int)RUN_MASK2) { int len; *token=(RUN_MASK2< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } + if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } else *token = (BYTE)(length<=(int)RUN_MASK) { int len; *token=(RUN_MASK< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } + if (length>=(int)RUN_MASK2) { int len; *token=(RUN_MASK2< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } else *token = (BYTE)(length<last_off) + { + *token+=(3<>8))<last_off = *ip-match; /* Encode MatchLength */ length = (int)(matchLength-MINMATCH); @@ -450,7 +461,7 @@ static int LZ5HC_compress_generic ( if (ml2 == ml) /* No better match */ { - if (LZ5HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; + if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml, ref, limit, oend)) return 0; continue; } @@ -504,9 +515,9 @@ static int LZ5HC_compress_generic ( /* ip & ref are known; Now for ml */ if (start2 < ip+ml) ml = (int)(start2 - ip); /* Now, encode 2 sequences */ - if (LZ5HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; + if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml, ref, limit, oend)) return 0; ip = start2; - if (LZ5HC_encodeSequence(&ip, &op, &anchor, ml2, ref2, limit, oend)) return 0; + if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml2, ref2, limit, oend)) return 0; continue; } @@ -528,7 +539,7 @@ static int LZ5HC_compress_generic ( } } - if (LZ5HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; + if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml, ref, limit, oend)) return 0; ip = start3; ref = ref3; ml = ml3; @@ -569,7 +580,7 @@ static int LZ5HC_compress_generic ( ml = (int)(start2 - ip); } } - if (LZ5HC_encodeSequence(&ip, &op, &anchor, ml, ref, limit, oend)) return 0; + if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml, ref, limit, oend)) return 0; ip = start2; ref = ref2; From db289ba1ce9b3240d3ea5f2cd48f25aec42d2e45 Mon Sep 17 00:00:00 2001 From: inikep Date: Wed, 25 Nov 2015 14:15:30 +0100 Subject: [PATCH 04/22] gain function --- lib/lz5hc.c | 137 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 110 insertions(+), 27 deletions(-) diff --git a/lib/lz5hc.c b/lib/lz5hc.c index c86130c..1c10d74 100644 --- a/lib/lz5hc.c +++ b/lib/lz5hc.c @@ -84,7 +84,8 @@ static const int LZ5HC_compressionLevel_default = 9; #define HASH_LOG3 13 #define HASHTABLESIZE (1 << HASH_LOG) #define HASHTABLESIZE3 (1 << HASH_LOG3) -#define SHORT_OFFSET_DISTANCE (1<<10) +#define LZ5_SHORT_OFFSET_BITS 10 +#define LZ5_SHORT_OFFSET_DISTANCE (1<<10) #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) @@ -167,6 +168,22 @@ FORCE_INLINE void LZ5HC_Insert (LZ5HC_Data_Structure* hc4, const BYTE* ip) } +#define LZ5_NUM_REPS 1 +#define LZ5_NORMAL_MATCH_COST(mlen,offset) (LZ5_MATCH_COST(mlen,offset)) +#define LZ5_NORMAL_LIT_COST(len) (len) + +#define LZ5_LIT_COST(len,offset) ((len)+((offset<(1 << LZ5_SHORT_OFFSET_BITS)) ? LZ5_SHORT_LITLEN_COST(len) : LZ5_LEN_COST(len))) +#define LZ5_MATCH_COST(mlen,offset) (LZ5_LEN_COST(mlen) + ((offset == 1) ? 1 : (offset<(1 << LZ5_SHORT_OFFSET_BITS) ? 2 : (offset<(1 << 16) ? 3 : 4)))) +#define LZ5_CODEWORD_COST(litlen,offset,mlen) (LZ5_MATCH_COST(mlen,offset) + LZ5_LIT_COST(litlen,offset)) +#define LZ5_LIT_ONLY_COST(len) ((len)+(LZ5_LEN_COST(len))+1) + +#define LZ5_SHORT_LITERALS ((1<last_off; + if (LZ5_read24(match) == LZ5_read24(ip)) + { + ml = LZ5_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + *matchpos = match; + // return (int)ml; + } + + U32* const hashTable3 = hc4->hashTable3; + size_t h = LZ5HC_hashPtr3(ip); + size_t offset = ip - base - hashTable3[h]; + + if (offset > 0 && offset < LZ5_SHORT_OFFSET_DISTANCE) + { + match = ip - offset; + if (match > base && LZ5_read24(ip) == LZ5_read24(match)) + { + mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + if (mlt > ml) + if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == hc4->last_off) ? 1 : (ip - match + LZ5_NUM_REPS)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == hc4->last_off) ? 1 : (ip - *matchpos + LZ5_NUM_REPS)) + (LZ5_NORMAL_LIT_COST(mlt - ml))) + { ml = mlt; *matchpos = match; } + } + } + + hashTable3[h] = ip - base; + /* HC4 match finder */ LZ5HC_Insert(hc4, ip); @@ -196,8 +240,10 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* hc4, /* I if (*(match+ml) == *(ip+ml) && (LZ5_read24(match) == LZ5_read24(ip))) { - size_t mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; - if (mlt > ml) { ml = mlt; *matchpos = match; } + mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + if (mlt > ml) + if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == hc4->last_off) ? 1 : (ip - match + LZ5_NUM_REPS)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == hc4->last_off) ? 1 : (ip - *matchpos + LZ5_NUM_REPS)) + (LZ5_NORMAL_LIT_COST(mlt - ml))) + { ml = mlt; *matchpos = match; } } } else @@ -205,39 +251,19 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* hc4, /* I match = dictBase + matchIndex; if (LZ5_read24(match) == LZ5_read24(ip)) { - size_t mlt; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iLimit) vLimit = iLimit; mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iLimit)) mlt += LZ5_count(ip+mlt, base+dictLimit, iLimit); - if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ + if (mlt > ml) + if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == hc4->last_off) ? 1 : (ip - match + LZ5_NUM_REPS)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == hc4->last_off) ? 1 : (ip - *matchpos + LZ5_NUM_REPS)) + (LZ5_NORMAL_LIT_COST(mlt - ml))) + { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ } } -// matchIndex -= DELTANEXTU16(matchIndex); matchIndex -= DELTANEXTU32(matchIndex); } -/* U32* const hashTable3 = hc4->hashTable3; - size_t h = LZ5HC_hashPtr3(ip); - - if (!ml) - { - size_t offset = ip - base - hashTable3[h]; - - if (offset > 0 && offset < SHORT_OFFSET_DISTANCE) - { - match = ip - offset; - if (match > base && LZ5_read24(ip) == LZ5_read24(match)) - { - ml = LZ5_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; - *matchpos = match; - } - } - } - - hashTable3[h] = ip - base; - */ return (int)ml; } @@ -268,6 +294,56 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( LZ5HC_Insert(hc4, ip); matchIndex = HashTable[LZ5HC_hashPtr(ip)]; + const BYTE* match = ip - hc4->last_off; + if (LZ5_read24(match) == LZ5_read24(ip)) + { + int mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, iHighLimit) + MINMATCH; + + int back = 0; + while ((ip+back>iLowLimit) && (match+back > lowPrefixPtr) && (ip[back-1] == match[back-1])) back--; + mlt -= back; + + if (mlt > longest) + if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == hc4->last_off) ? 1 : (ip - match + LZ5_NUM_REPS)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip - *matchpos == hc4->last_off) ? 1 : (ip - *matchpos + LZ5_NUM_REPS)) + LZ5_NORMAL_LIT_COST(mlt - longest)) + { + *matchpos = match+back; + *startpos = ip+back; + longest = (int)mlt; + // return (int)mlt; + } + } + + + + U32* const hashTable3 = hc4->hashTable3; + size_t h = LZ5HC_hashPtr3(ip); + + size_t offset = ip - base - hashTable3[h]; + + if (offset > 0 && offset < LZ5_SHORT_OFFSET_DISTANCE) + { + match = ip - offset; + if (match > base && LZ5_read24(ip) == LZ5_read24(match)) + { + int mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, iHighLimit) + MINMATCH; + + int back = 0; + while ((ip+back>iLowLimit) && (match+back > lowPrefixPtr) && (ip[back-1] == match[back-1])) back--; + mlt -= back; + + if (mlt > longest) + if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == hc4->last_off) ? 1 : (ip - match + LZ5_NUM_REPS)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip - *matchpos == hc4->last_off) ? 1 : (ip - *matchpos + LZ5_NUM_REPS)) + LZ5_NORMAL_LIT_COST(mlt - longest)) + { + *matchpos = match+back; + *startpos = ip+back; + longest = (int)mlt; + } + } + } + + hashTable3[h] = ip - base; + + while ((matchIndex>=lowLimit) && (nbAttempts)) { nbAttempts--; @@ -288,6 +364,7 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( mlt -= back; if (mlt > longest) + if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - matchPtr == hc4->last_off) ? 1 : (ip - matchPtr + LZ5_NUM_REPS)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip - *matchpos == hc4->last_off) ? 1 : (ip - *matchpos + LZ5_NUM_REPS)) + (LZ5_NORMAL_LIT_COST(mlt - longest))) { longest = (int)mlt; *matchpos = matchPtr+back; @@ -449,6 +526,12 @@ static int LZ5HC_compress_generic ( ml = LZ5HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts); if (!ml) { ip++; continue; } + if (ip-ref == ctx->last_off) /* last offset */ + { + if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml, ref, limit, oend)) return 0; + continue; + } + /* saved, in case we would skip too much */ start0 = ip; ref0 = ref; From f2934ab8ca79b580c8edbba026588893a801c921 Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 27 Nov 2015 17:45:17 +0100 Subject: [PATCH 05/22] new parser --- lib/lz5.c | 8 +- lib/lz5hc.c | 445 ++++++++++++++++++++++++---------------------------- 2 files changed, 211 insertions(+), 242 deletions(-) diff --git a/lib/lz5.c b/lib/lz5.c index 357eb38..881a415 100644 --- a/lib/lz5.c +++ b/lib/lz5.c @@ -208,9 +208,6 @@ static void LZ5_write16(void* memPtr, U16 value) #endif // LZ5_FORCE_MEMORY_ACCESS -#define LZ5_read24(ptr) (uint32_t)(LZ5_read32(ptr)<<8) -//#define LZ5_read24(ptr) LZ5_read32(ptr) - static U16 LZ5_readLE16(const void* memPtr) { if (LZ5_isLittleEndian()) @@ -295,7 +292,6 @@ static void LZ5_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) * Common Constants **************************************/ #define MINMATCH 3 -#define MINMATCH4 4 #define WILDCOPYLENGTH 8 #define LASTLITERALS 5 @@ -466,9 +462,9 @@ int LZ5_sizeofState() { return LZ5_STREAMSIZE; } static U32 LZ5_hashSequence(U32 sequence, tableType_t const tableType) { if (tableType == byU16) - return (((sequence) * 2654435761U) >> ((MINMATCH4*8)-(LZ5_HASHLOG+1))); + return (((sequence) * 2654435761U) >> ((32)-(LZ5_HASHLOG+1))); else - return (((sequence) * 2654435761U) >> ((MINMATCH4*8)-LZ5_HASHLOG)); + return (((sequence) * 2654435761U) >> ((32)-LZ5_HASHLOG)); } static const U64 prime5bytes = 889523592379ULL; diff --git a/lib/lz5hc.c b/lib/lz5hc.c index 1c10d74..1256981 100644 --- a/lib/lz5hc.c +++ b/lib/lz5hc.c @@ -33,11 +33,32 @@ */ +/* ************************************* +* Includes +***************************************/ +#include "lz5hc.h" +#include + + +/* ************************************* +* Common LZ5 definition +***************************************/ +#define LZ5_COMMONDEFS_ONLY +#include "lz5.c" + /* ************************************* * Tuning Parameter ***************************************/ static const int LZ5HC_compressionLevel_default = 9; +#define LZ5HC_DEBUG(fmt, args...) ; //printf(fmt, ##args) + +#if MINMATCH == 3 + #define LZ5_read24(ptr) (uint32_t)(LZ5_read32(ptr)<<8) +#else + #define LZ5_read24(ptr) (uint32_t)(LZ5_read32(ptr)) +#endif + /*! * HEAPMODE : @@ -48,12 +69,6 @@ static const int LZ5HC_compressionLevel_default = 9; #define LZ5HC_HEAPMODE 0 -/* ************************************* -* Includes -***************************************/ -#include "lz5hc.h" - - /* ************************************* * Local Compiler Options ***************************************/ @@ -66,13 +81,6 @@ static const int LZ5HC_compressionLevel_default = 9; #endif -/* ************************************* -* Common LZ5 definition -***************************************/ -#define LZ5_COMMONDEFS_ONLY -#include "lz5.c" - - /* ************************************* * Local Constants ***************************************/ @@ -80,14 +88,14 @@ static const int LZ5HC_compressionLevel_default = 9; #define MAXD (1<(b))?(a):(b) static const int g_maxCompressionLevel = 16; @@ -104,6 +112,7 @@ struct LZ5HC_Data_s const BYTE* base; /* All index relative to this position */ const BYTE* dictBase; /* alternate base for extDict */ BYTE* inputBuffer; /* deprecated */ + BYTE* outputBuffer; /* deprecated */ U32 dictLimit; /* below that point, need extDict */ U32 lowLimit; /* below that point, no more dict */ U32 nextToUpdate; /* index from which to continue dictionary update */ @@ -114,68 +123,68 @@ struct LZ5HC_Data_s /************************************** * Local Macros **************************************/ -#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH4*8)-HASH_LOG)) -#define HASH_FUNCTION3(i) (((i) * 506832829U) >> ((MINMATCH4*8)-HASH_LOG3)) -//#define DELTANEXTU16(p) chainTable[(p) & MAXD_MASK] /* flexible, MAXD dependent */ -#define DELTANEXTU16(p) chainTable[(U16)(p)] /* faster */ -#define DELTANEXTU32(p) chainTable[(p) & MAXD_MASK] /* flexible, MAXD dependent */ +#define HASH_FUNCTION(i) (((i) * 506832829U) >> ((32)-HASH_LOG)) +//#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((32)-HASH_LOG)) +#define HASH_FUNCTION3(i) (((i) * 506832829U) >> ((32)-HASH_LOG3)) static U32 LZ5HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ5_read32(ptr)); } -static U32 LZ5HC_hashPtr3(const void* ptr) { return HASH_FUNCTION3(LZ5_read32(ptr)); } +static U32 LZ5HC_hashPtr3(const void* ptr) { return HASH_FUNCTION3(LZ5_read24(ptr)); } -#define LZ5HC_LIMIT (1<hashTable, 0, sizeof(U32)*HASHTABLESIZE); - MEM_INIT((void*)hc4->hashTable3, 0, sizeof(U32)*HASHTABLESIZE3); - MEM_INIT(hc4->chainTable, 0xFF, sizeof(U32)*MAXD); - hc4->nextToUpdate = LZ5HC_LIMIT; - hc4->base = start - LZ5HC_LIMIT; - hc4->end = start; - hc4->dictBase = start - LZ5HC_LIMIT; - hc4->dictLimit = LZ5HC_LIMIT; - hc4->lowLimit = LZ5HC_LIMIT; - hc4->last_off = 1; + MEM_INIT((void*)ctx->hashTable, 0, sizeof(U32)*HASHTABLESIZE); + MEM_INIT((void*)ctx->hashTable3, 0, sizeof(U32)*HASHTABLESIZE3); + MEM_INIT(ctx->chainTable, 0xFF, sizeof(U32)*MAXD); + ctx->nextToUpdate = LZ5HC_LIMIT; + ctx->base = start - LZ5HC_LIMIT; + ctx->end = start; + ctx->dictBase = start - LZ5HC_LIMIT; + ctx->dictLimit = LZ5HC_LIMIT; + ctx->lowLimit = LZ5HC_LIMIT; + ctx->last_off = 1; } /* Update chains up to ip (excluded) */ -FORCE_INLINE void LZ5HC_Insert (LZ5HC_Data_Structure* hc4, const BYTE* ip) +FORCE_INLINE void LZ5HC_Insert (LZ5HC_Data_Structure* ctx, const BYTE* ip) { - U32* chainTable = hc4->chainTable; - U32* HashTable = hc4->hashTable; - const BYTE* const base = hc4->base; + U32* chainTable = ctx->chainTable; + U32* HashTable = ctx->hashTable; +#if MINMATCH == 3 + U32* HashTable3 = ctx->hashTable3; +#endif + const BYTE* const base = ctx->base; const U32 target = (U32)(ip - base); - U32 idx = hc4->nextToUpdate; + U32 idx = ctx->nextToUpdate; while(idx < target) { U32 h = LZ5HC_hashPtr(base+idx); - size_t delta = idx - HashTable[h]; - if (delta>MAX_DISTANCE) delta = MAX_DISTANCE; -// DELTANEXTU16(idx) = (U16)delta; - DELTANEXTU32(idx) = (U32)delta; + chainTable[idx & MAXD_MASK] = (U32)(idx - HashTable[h]); HashTable[h] = idx; - idx++; +#if MINMATCH == 3 + HashTable3[LZ5HC_hashPtr3(base+idx)] = idx; +#endif + idx++; } - hc4->nextToUpdate = target; + ctx->nextToUpdate = target; } -#define LZ5_NUM_REPS 1 #define LZ5_NORMAL_MATCH_COST(mlen,offset) (LZ5_MATCH_COST(mlen,offset)) #define LZ5_NORMAL_LIT_COST(len) (len) -#define LZ5_LIT_COST(len,offset) ((len)+((offset<(1 << LZ5_SHORT_OFFSET_BITS)) ? LZ5_SHORT_LITLEN_COST(len) : LZ5_LEN_COST(len))) -#define LZ5_MATCH_COST(mlen,offset) (LZ5_LEN_COST(mlen) + ((offset == 1) ? 1 : (offset<(1 << LZ5_SHORT_OFFSET_BITS) ? 2 : (offset<(1 << 16) ? 3 : 4)))) +#define LZ5_LIT_COST(len,offset) ((len)+((/*((offset) != 1) &&*/ ((offset)chainTable; - U32* const HashTable = hc4->hashTable; - const BYTE* const base = hc4->base; - const BYTE* const dictBase = hc4->dictBase; - const U32 dictLimit = hc4->dictLimit; - const U32 lowLimit = (hc4->lowLimit + LZ5HC_LIMIT > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (LZ5HC_LIMIT - 1); + U32* const chainTable = ctx->chainTable; + U32* const HashTable = ctx->hashTable; + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const U32 dictLimit = ctx->dictLimit; + const U32 lowLimit = (ctx->lowLimit + LZ5HC_LIMIT > (U32)(ip-base)) ? ctx->lowLimit : (U32)(ip - base) - (LZ5HC_LIMIT - 1); U32 matchIndex; const BYTE* match; int nbAttempts=maxNbAttempts; size_t ml=0, mlt; - match = ip - hc4->last_off; + /* HC4 match finder */ + LZ5HC_Insert(ctx, ip); + matchIndex = HashTable[LZ5HC_hashPtr(ip)]; + + match = ip - ctx->last_off; if (LZ5_read24(match) == LZ5_read24(ip)) { ml = LZ5_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; *matchpos = match; - // return (int)ml; + return (int)ml; } - U32* const hashTable3 = hc4->hashTable3; - size_t h = LZ5HC_hashPtr3(ip); - size_t offset = ip - base - hashTable3[h]; - +#if MINMATCH == 3 + size_t offset = ip - base - ctx->hashTable3[LZ5HC_hashPtr3(ip)]; if (offset > 0 && offset < LZ5_SHORT_OFFSET_DISTANCE) { match = ip - offset; if (match > base && LZ5_read24(ip) == LZ5_read24(match)) { - mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; - if (mlt > ml) - if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == hc4->last_off) ? 1 : (ip - match + LZ5_NUM_REPS)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == hc4->last_off) ? 1 : (ip - *matchpos + LZ5_NUM_REPS)) + (LZ5_NORMAL_LIT_COST(mlt - ml))) - { ml = mlt; *matchpos = match; } + ml = 3;//LZ5_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + *matchpos = match; } } - - hashTable3[h] = ip - base; - - - /* HC4 match finder */ - LZ5HC_Insert(hc4, ip); - matchIndex = HashTable[LZ5HC_hashPtr(ip)]; +#endif while ((matchIndex>=lowLimit) && (nbAttempts)) { @@ -237,19 +240,18 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* hc4, /* I if (matchIndex >= dictLimit) { match = base + matchIndex; - if (*(match+ml) == *(ip+ml) - && (LZ5_read24(match) == LZ5_read24(ip))) + if (match < ip && *(match+ml) == *(ip+ml) && (LZ5_read32(match) == LZ5_read32(ip))) { mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; if (mlt > ml) - if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == hc4->last_off) ? 1 : (ip - match + LZ5_NUM_REPS)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == hc4->last_off) ? 1 : (ip - *matchpos + LZ5_NUM_REPS)) + (LZ5_NORMAL_LIT_COST(mlt - ml))) + if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 1 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 1 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml))) { ml = mlt; *matchpos = match; } } } else { match = dictBase + matchIndex; - if (LZ5_read24(match) == LZ5_read24(ip)) + if (LZ5_read32(match) == LZ5_read32(ip)) { const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iLimit) vLimit = iLimit; @@ -257,19 +259,32 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* hc4, /* I if ((ip+mlt == vLimit) && (vLimit < iLimit)) mlt += LZ5_count(ip+mlt, base+dictLimit, iLimit); if (mlt > ml) - if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == hc4->last_off) ? 1 : (ip - match + LZ5_NUM_REPS)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == hc4->last_off) ? 1 : (ip - *matchpos + LZ5_NUM_REPS)) + (LZ5_NORMAL_LIT_COST(mlt - ml))) + if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 1 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 1 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml))) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ } } - matchIndex -= DELTANEXTU32(matchIndex); + matchIndex -= chainTable[matchIndex & MAXD_MASK]; } return (int)ml; } +FORCE_INLINE int LZ5_MORE_PROFITABLE(uint32_t best_off, uint32_t best_common, uint32_t off, uint32_t common, int literals, uint32_t last_off) +{ + int sum; + + if (literals > 0) + sum = MAX(common + literals, best_common); + else + sum = MAX(common, best_common - literals); + +// return LZ5_CODEWORD_COST(sum - common, (off == last_off) ? 1 : (off), common - MINMATCH) <= LZ5_CODEWORD_COST(sum - best_common, (best_off == last_off) ? 1 : (best_off), best_common - MINMATCH); + return LZ5_NORMAL_MATCH_COST(common - MINMATCH, (off == last_off) ? 1 : (off)) + LZ5_NORMAL_LIT_COST(sum - common) <= LZ5_NORMAL_MATCH_COST(best_common - MINMATCH, (best_off == last_off) ? 1 : (best_off)) + LZ5_NORMAL_LIT_COST(sum - best_common); +} + FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( - LZ5HC_Data_Structure* hc4, + LZ5HC_Data_Structure* ctx, const BYTE* const ip, const BYTE* const iLowLimit, const BYTE* const iHighLimit, @@ -278,23 +293,23 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( const BYTE** startpos, const int maxNbAttempts) { - U32* const chainTable = hc4->chainTable; - U32* const HashTable = hc4->hashTable; - const BYTE* const base = hc4->base; - const U32 dictLimit = hc4->dictLimit; + U32* const chainTable = ctx->chainTable; + U32* const HashTable = ctx->hashTable; + const BYTE* const base = ctx->base; + const U32 dictLimit = ctx->dictLimit; const BYTE* const lowPrefixPtr = base + dictLimit; - const U32 lowLimit = (hc4->lowLimit + LZ5HC_LIMIT > (U32)(ip-base)) ? hc4->lowLimit : (U32)(ip - base) - (LZ5HC_LIMIT - 1); - const BYTE* const dictBase = hc4->dictBase; + const U32 lowLimit = (ctx->lowLimit + LZ5HC_LIMIT > (U32)(ip-base)) ? ctx->lowLimit : (U32)(ip - base) - (LZ5HC_LIMIT - 1); + const BYTE* const dictBase = ctx->dictBase; + const BYTE* match; U32 matchIndex; int nbAttempts = maxNbAttempts; - int delta = (int)(ip-iLowLimit); /* First Match */ - LZ5HC_Insert(hc4, ip); + LZ5HC_Insert(ctx, ip); matchIndex = HashTable[LZ5HC_hashPtr(ip)]; - const BYTE* match = ip - hc4->last_off; + match = ip - ctx->last_off; if (LZ5_read24(match) == LZ5_read24(ip)) { int mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, iHighLimit) + MINMATCH; @@ -304,35 +319,29 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( mlt -= back; if (mlt > longest) - if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == hc4->last_off) ? 1 : (ip - match + LZ5_NUM_REPS)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip - *matchpos == hc4->last_off) ? 1 : (ip - *matchpos + LZ5_NUM_REPS)) + LZ5_NORMAL_LIT_COST(mlt - longest)) { *matchpos = match+back; *startpos = ip+back; longest = (int)mlt; - // return (int)mlt; } } - - U32* const hashTable3 = hc4->hashTable3; - size_t h = LZ5HC_hashPtr3(ip); - - size_t offset = ip - base - hashTable3[h]; - +#if MINMATCH == 3 + size_t offset = ip - base - ctx->hashTable3[LZ5HC_hashPtr3(ip)]; if (offset > 0 && offset < LZ5_SHORT_OFFSET_DISTANCE) { match = ip - offset; if (match > base && LZ5_read24(ip) == LZ5_read24(match)) { - int mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, iHighLimit) + MINMATCH; + int mlt = 3;//LZ5_count(ip+MINMATCH, match+MINMATCH, iHighLimit) + MINMATCH; int back = 0; while ((ip+back>iLowLimit) && (match+back > lowPrefixPtr) && (ip[back-1] == match[back-1])) back--; mlt -= back; if (mlt > longest) - if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == hc4->last_off) ? 1 : (ip - match + LZ5_NUM_REPS)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip - *matchpos == hc4->last_off) ? 1 : (ip - *matchpos + LZ5_NUM_REPS)) + LZ5_NORMAL_LIT_COST(mlt - longest)) + if (!longest || LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 1 : (ip - match)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip - *matchpos == ctx->last_off) ? 1 : (ip - *matchpos)) + LZ5_NORMAL_LIT_COST(mlt - longest)) { *matchpos = match+back; *startpos = ip+back; @@ -340,9 +349,7 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( } } } - - hashTable3[h] = ip - base; - +#endif while ((matchIndex>=lowLimit) && (nbAttempts)) { @@ -350,8 +357,9 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( if (matchIndex >= dictLimit) { const BYTE* matchPtr = base + matchIndex; - if (*(iLowLimit + longest) == *(matchPtr - delta + longest)) - if (LZ5_read24(matchPtr) == LZ5_read24(ip)) + // if (*(ip + longest) == *(matchPtr + longest)) + + if (matchPtr < ip && LZ5_read32(matchPtr) == LZ5_read32(ip)) { int mlt = MINMATCH + LZ5_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); int back = 0; @@ -363,8 +371,12 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( mlt -= back; +// if ((ip - ctx->inputBuffer) >= 32867) +// printf("mlt=%d back=%d off=%d longest=%d long_off=%d\n", mlt, back, (U32)(ip-matchPtr), longest, (U32)(*matchpos-*startpos)); + + if (mlt > longest) - if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - matchPtr == hc4->last_off) ? 1 : (ip - matchPtr + LZ5_NUM_REPS)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip - *matchpos == hc4->last_off) ? 1 : (ip - *matchpos + LZ5_NUM_REPS)) + (LZ5_NORMAL_LIT_COST(mlt - longest))) + if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - matchPtr == ctx->last_off) ? 1 : (ip - matchPtr)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip - *matchpos == ctx->last_off) ? 1 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - longest) )) { longest = (int)mlt; *matchpos = matchPtr+back; @@ -375,7 +387,7 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( else { const BYTE* matchPtr = dictBase + matchIndex; - if (LZ5_read24(matchPtr) == LZ5_read24(ip)) + if (LZ5_read32(matchPtr) == LZ5_read32(ip)) { size_t mlt; int back=0; @@ -389,8 +401,7 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; } } } -// matchIndex -= DELTANEXTU16(matchIndex); - matchIndex -= DELTANEXTU32(matchIndex); + matchIndex -= chainTable[matchIndex & MAXD_MASK]; } @@ -400,10 +411,13 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive; -#define LZ5HC_DEBUG 0 -#if LZ5HC_DEBUG -static unsigned debug = 0; -#endif +/* +LZ5 uses 3 types of codewords from 2 to 4 bytes long: +- 1_OO_LL_MMM OOOOOOOO - 10-bit offset, 3-bit match length, 2-bit literal length +- 00_LLL_MMM OOOOOOOO OOOOOOOO - 16-bit offset, 3-bit match length, 3-bit literal length +- 010_LL_MMM OOOOOOOO OOOOOOOO OOOOOOOO - 24-bit offset, 3-bit match length, 2-bit literal length +- 011_LL_MMM - last offset, 3-bit match length, 2-bit literal length +*/ FORCE_INLINE int LZ5HC_encodeSequence ( LZ5HC_Data_Structure* ctx, @@ -418,10 +432,6 @@ FORCE_INLINE int LZ5HC_encodeSequence ( int length; BYTE* token; -#if LZ5HC_DEBUG - if (debug) printf("literal : %u -- match : %u -- offset : %u\n", (U32)(*ip - *anchor), (U32)matchLength, (U32)(*ip-match)); -#endif - /* Encode Literal length */ length = (int)(*ip - *anchor); token = (*op)++; @@ -473,6 +483,8 @@ FORCE_INLINE int LZ5HC_encodeSequence ( if (length>=(int)ML_MASK) { *token+=ML_MASK; length-=ML_MASK; for(; length > 509 ; length-=510) { *(*op)++ = 255; *(*op)++ = 255; } if (length > 254) { length-=255; *(*op)++ = 255; } *(*op)++ = (BYTE)length; } else *token += (BYTE)(length); + LZ5HC_DEBUG("%u: ENCODE literals=%u off=%u mlen=%u out=%u\n", (U32)(*ip - ctx->inputBuffer), (U32)(*ip - *anchor), (U32)(*ip-match), (U32)matchLength, 2+(U32)(*op - ctx->outputBuffer)); + /* Prepare next loop */ *ip += matchLength; *anchor = *ip; @@ -492,6 +504,8 @@ static int LZ5HC_compress_generic ( ) { LZ5HC_Data_Structure* ctx = (LZ5HC_Data_Structure*) ctxvoid; + ctx->inputBuffer = (BYTE*) source; + ctx->outputBuffer = (BYTE*) dest; const BYTE* ip = (const BYTE*) source; const BYTE* anchor = ip; const BYTE* const iend = ip + inputSize; @@ -510,7 +524,7 @@ static int LZ5HC_compress_generic ( const BYTE* ref3=NULL; const BYTE* start0; const BYTE* ref0; - + const BYTE* lowPrefixPtr = ctx->base + ctx->dictLimit; /* init */ if (compressionLevel > g_maxCompressionLevel) compressionLevel = g_maxCompressionLevel; @@ -520,160 +534,118 @@ static int LZ5HC_compress_generic ( ip++; + printf("maxNbAttempts=%d\n", maxNbAttempts); + int swapped = 0; + /* Main Loop */ while (ip < mflimit) { ml = LZ5HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts); if (!ml) { ip++; continue; } - if (ip-ref == ctx->last_off) /* last offset */ - { - if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml, ref, limit, oend)) return 0; - continue; - } + int back = 0; + while ((ip+back>anchor) && (ref+back > lowPrefixPtr) && (ip[back-1] == ref[back-1])) back--; + ml -= back; + ip += back; + ref += back; /* saved, in case we would skip too much */ start0 = ip; ref0 = ref; ml0 = ml; -_Search2: - if (ip+ml < mflimit) - ml2 = LZ5HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, ip + 1, matchlimit, ml, &ref2, &start2, maxNbAttempts); - else ml2 = ml; +_Search: + if (ip+ml >= mflimit) goto _Encode; - if (ml2 == ml) /* No better match */ + ml2 = LZ5HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, anchor, matchlimit, 0, &ref2, &start2, maxNbAttempts); + if (ml2 == 0) goto _Encode; + + +#if 0 + if (start2 < ip) { - if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml, ref, limit, oend)) return 0; - continue; + start3 = start2; ref3 = ref2; ml3 = ml2; + start2 = ip; ref2 = ref; ml2 = ml; + ip = start3; ref = ref3; ml = ml3; + swapped = 1; } +#endif - if (start0 < ip) + int price, best_price, off0=0, off1=0; + uint8_t *pos, *best_pos; + + // find the lowest price for encoding ml bytes + best_pos = (uint8_t*)ip; + best_price = 1<<30; + off0 = (uint8_t*)ip - ref; + off1 = start2 - ref2; + + for (pos = (uint8_t*)ip + ml; pos >= start2; pos--) { - if (start2 < ip + ml0) /* empirical */ + int common0 = pos - ip; + if (common0 >= MINMATCH) { - ip = start0; - ref = ref0; - ml = ml0; + price = LZ5_CODEWORD_COST(ip - anchor, (off0 == ctx->last_off) ? 1 : off0, common0 - MINMATCH); + + int common1 = start2 + ml2 - pos; + if (common1 >= MINMATCH) + price += LZ5_CODEWORD_COST(0, (off1 == off0) ? 1 : (off1), common1 - MINMATCH); + else + price += LZ5_LIT_ONLY_COST(common1); + +/* if ((U32)(ip - ctx->inputBuffer) == 73786) + { + printf("1b common0=%d common1=%d price=%d best_price=%d\n", common0, common1, price, best_price); + }*/ } - } + else + { + price = LZ5_CODEWORD_COST(start2 - anchor, (off1 == ctx->last_off) ? 1 : off1, ml2 - MINMATCH); - /* Here, start0==ip */ - if ((start2 - ip) < 3) /* First Match too small : removed */ - { - ml = ml2; - ip = start2; - ref =ref2; - goto _Search2; - } + if ((U32)(ip - ctx->inputBuffer) == 73786) + { + printf("2 common0=%d price=%d best_price=%d\n", common0, price, best_price); + } + } -_Search3: - /* - * Currently we have : - * ml2 > ml1, and - * ip1+3 <= ip2 (usually < ip1+ml1) - */ - if ((start2 - ip) < OPTIMAL_ML) - { - int correction; - int new_ml = ml; - if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; - if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = new_ml - (int)(start2 - ip); - if (correction > 0) + if (price < best_price) { - start2 += correction; - ref2 += correction; - ml2 -= correction; + best_price = price; + best_pos = pos; } } - /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */ - if (start2 + ml2 < mflimit) - ml3 = LZ5HC_InsertAndGetWiderMatch(ctx, start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, maxNbAttempts); - else ml3 = ml2; - - if (ml3 == ml2) /* No better match : 2 sequences to encode */ + // LZ5HC_DEBUG("%u: TRY last_off=%d literals=%u off=%u mlen=%u literals2=%u off2=%u mlen2=%u best=%d\n", (U32)(ip - ctx->inputBuffer), ctx->last_off, (U32)(ip - anchor), off0, (U32)ml, (U32)(start2 - anchor), off1, ml2, (U32)(best_pos - ip)); + + ml = best_pos - ip; + if (ml < MINMATCH) { - /* ip & ref are known; Now for ml */ - if (start2 < ip+ml) ml = (int)(start2 - ip); - /* Now, encode 2 sequences */ - if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml, ref, limit, oend)) return 0; ip = start2; - if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml2, ref2, limit, oend)) return 0; - continue; - } - - if (start3 < ip+ml+3) /* Not enough space for match 2 : remove it */ - { - if (start3 >= (ip+ml)) /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ + ref = ref2; + ml = ml2; + // LZ5HC_DEBUG("%u: (ml < MINMATCH : %u -- match : %u -- offset : %u\n", (U32)(ip - ctx->inputBuffer), (U32)(ip - anchor), (U32)ml, (U32)(ip-ref)); + if (swapped) { - if (start2 < ip+ml) - { - int correction = (int)(ip+ml - start2); - start2 += correction; - ref2 += correction; - ml2 -= correction; - if (ml2 < MINMATCH) - { - start2 = start3; - ref2 = ref3; - ml2 = ml3; - } - } - - if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml, ref, limit, oend)) return 0; - ip = start3; - ref = ref3; - ml = ml3; - - start0 = start2; - ref0 = ref2; - ml0 = ml2; - goto _Search2; + swapped = 0; + goto _Encode; } - - start2 = start3; - ref2 = ref3; - ml2 = ml3; - goto _Search3; + goto _Search; } + +_Encode: - /* - * OK, now we have 3 ascending matches; let's write at least the first one - * ip & ref are known; Now for ml - */ - if (start2 < ip+ml) + if (start0 < ip) { - if ((start2 - ip) < (int)ML_MASK) + // LZ5HC_DEBUG("%u: 1literal : %u -- match : %u -- offset : %u\n", (U32)(ip - ctx->inputBuffer), (U32)(ip - anchor), (U32)ml, (U32)(ip-ref)); + if (LZ5_MORE_PROFITABLE(ip - ref, ml, start0 - ref0, ml0, ref0 - ref, ctx->last_off)) { - int correction; - if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; - if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = ml - (int)(start2 - ip); - if (correction > 0) - { - start2 += correction; - ref2 += correction; - ml2 -= correction; - } - } - else - { - ml = (int)(start2 - ip); + ip = start0; + ref = ref0; + ml = ml0; } } - if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml, ref, limit, oend)) return 0; - - ip = start2; - ref = ref2; - ml = ml2; - start2 = start3; - ref2 = ref3; - ml2 = ml3; - - goto _Search3; + if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml, ref, limit, oend)) return 0; } /* Encode Last Literals */ @@ -691,6 +663,7 @@ static int LZ5HC_compress_generic ( } + int LZ5_sizeofStateHC(void) { return sizeof(LZ5HC_Data_Structure); } int LZ5_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel) From a2089a2688c0624cc83fab6b4be2ca282f5f1bf6 Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 27 Nov 2015 18:38:35 +0100 Subject: [PATCH 06/22] improved compression --- Makefile | 2 +- NEWS | 7 +++- lib/Makefile | 2 +- lib/lz5.c | 17 +++++++-- lib/lz5.h | 2 +- lib/lz5hc.c | 100 +++++++++++++++++++-------------------------------- 6 files changed, 61 insertions(+), 69 deletions(-) diff --git a/Makefile b/Makefile index 0d69b15..d58710f 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ # ################################################################ # Version number -export VERSION=131 +export VERSION=132 export RELEASE=r$(VERSION) DESTDIR?= diff --git a/NEWS b/NEWS index 22db047..32676b3 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,8 @@ +r132 +- added: a new parser +- added: a gain function +- added: a special codeword for the last occured offset +- added: support for 3-byte long matches + r131 The first release based on LZ4 r132 dev - diff --git a/lib/Makefile b/lib/Makefile index e463931..51aa67a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -31,7 +31,7 @@ # ################################################################ # Version numbers -VERSION?= 131 +VERSION?= 132 LIBVER_MAJOR:=`sed -n '/define LZ5_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz5.h` LIBVER_MINOR:=`sed -n '/define LZ5_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz5.h` LIBVER_PATCH:=`sed -n '/define LZ5_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < lz5.h` diff --git a/lib/lz5.c b/lib/lz5.c index 881a415..b0e40c7 100644 --- a/lib/lz5.c +++ b/lib/lz5.c @@ -1,6 +1,7 @@ /* LZ5 - Fast LZ compression algorithm Copyright (C) 2011-2015, Yann Collet. + Copyright (C) 2015, Przemyslaw Skibinski BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -1364,6 +1365,19 @@ FORCE_INLINE int LZ5_decompress_generic( ip += length; op = cpy; /* get offset */ +#if 0 + switch (token>>6) + { + default: offset = *ip + (((token>>ML_RUN_BITS2)&3)<<8); ip++; break; + case 0: offset = LZ5_readLE16(ip); ip+=2; break; + case 1: + if ((token>>5) == 3) + offset = last_off; + else // (token>>ML_RUN_BITS2) == 2 + { offset = LZ5_readLE24(ip); ip+=3; } + break; + } +#else if (token>>7) { offset = *ip + (((token>>ML_RUN_BITS2)&3)<<8); ip++; @@ -1381,11 +1395,10 @@ FORCE_INLINE int LZ5_decompress_generic( else // (token>>ML_RUN_BITS2) == 3 { offset = last_off; -// printf("2last_off=%d\n", offset); } +#endif last_off = offset; - // printf("1last_off=%d\n", last_off); match = op - offset; if ((checkOffset) && (unlikely(match < lowLimit))) goto _output_error; /* Error : offset outside buffers */ diff --git a/lib/lz5.h b/lib/lz5.h index b794811..8ff1f55 100644 --- a/lib/lz5.h +++ b/lib/lz5.h @@ -48,7 +48,7 @@ extern "C" { * Version **************************************/ #define LZ5_VERSION_MAJOR 1 /* for breaking interface changes */ -#define LZ5_VERSION_MINOR 7 /* for new (non-breaking) interface capabilities */ +#define LZ5_VERSION_MINOR 3 /* for new (non-breaking) interface capabilities */ #define LZ5_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */ #define LZ5_VERSION_NUMBER (LZ5_VERSION_MAJOR *100*100 + LZ5_VERSION_MINOR *100 + LZ5_VERSION_RELEASE) int LZ5_versionNumber (void); diff --git a/lib/lz5hc.c b/lib/lz5hc.c index 1256981..139ef47 100644 --- a/lib/lz5hc.c +++ b/lib/lz5hc.c @@ -1,6 +1,7 @@ /* LZ5 HC - High Compression Mode of LZ5 Copyright (C) 2011-2015, Yann Collet. + Copyright (C) 2015, Przemyslaw Skibinski BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) @@ -97,7 +98,23 @@ static const int LZ5HC_compressionLevel_default = 9; #define MAX(a,b) ((a)>(b))?(a):(b) -static const int g_maxCompressionLevel = 16; +#define LZ5_SHORT_LITERALS ((1< (1<<16)) || ((offset)> ((32)-HASH_LOG)) -//#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((32)-HASH_LOG)) +#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((32)-HASH_LOG)) #define HASH_FUNCTION3(i) (((i) * 506832829U) >> ((32)-HASH_LOG3)) static U32 LZ5HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ5_read32(ptr)); } @@ -178,20 +194,6 @@ FORCE_INLINE void LZ5HC_Insert (LZ5HC_Data_Structure* ctx, const BYTE* ip) } -#define LZ5_NORMAL_MATCH_COST(mlen,offset) (LZ5_MATCH_COST(mlen,offset)) -#define LZ5_NORMAL_LIT_COST(len) (len) - -#define LZ5_LIT_COST(len,offset) ((len)+((/*((offset) != 1) &&*/ ((offset) ml) - if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 1 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 1 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml))) + if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml))) { ml = mlt; *matchpos = match; } } } @@ -259,7 +261,7 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* ctx, /* I if ((ip+mlt == vLimit) && (vLimit < iLimit)) mlt += LZ5_count(ip+mlt, base+dictLimit, iLimit); if (mlt > ml) - if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 1 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 1 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml))) + if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml))) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ } } @@ -278,8 +280,8 @@ FORCE_INLINE int LZ5_MORE_PROFITABLE(uint32_t best_off, uint32_t best_common, ui else sum = MAX(common, best_common - literals); -// return LZ5_CODEWORD_COST(sum - common, (off == last_off) ? 1 : (off), common - MINMATCH) <= LZ5_CODEWORD_COST(sum - best_common, (best_off == last_off) ? 1 : (best_off), best_common - MINMATCH); - return LZ5_NORMAL_MATCH_COST(common - MINMATCH, (off == last_off) ? 1 : (off)) + LZ5_NORMAL_LIT_COST(sum - common) <= LZ5_NORMAL_MATCH_COST(best_common - MINMATCH, (best_off == last_off) ? 1 : (best_off)) + LZ5_NORMAL_LIT_COST(sum - best_common); +// return LZ5_CODEWORD_COST(sum - common, (off == last_off) ? 0 : (off), common - MINMATCH) <= LZ5_CODEWORD_COST(sum - best_common, (best_off == last_off) ? 0 : (best_off), best_common - MINMATCH); + return LZ5_NORMAL_MATCH_COST(common - MINMATCH, (off == last_off) ? 0 : (off)) + LZ5_NORMAL_LIT_COST(sum - common) <= LZ5_NORMAL_MATCH_COST(best_common - MINMATCH, (best_off == last_off) ? 0 : (best_off)) + LZ5_NORMAL_LIT_COST(sum - best_common); } @@ -341,7 +343,7 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( mlt -= back; if (mlt > longest) - if (!longest || LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 1 : (ip - match)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip - *matchpos == ctx->last_off) ? 1 : (ip - *matchpos)) + LZ5_NORMAL_LIT_COST(mlt - longest)) + if (!longest || LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + LZ5_NORMAL_LIT_COST(mlt - longest)) { *matchpos = match+back; *startpos = ip+back; @@ -371,12 +373,8 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( mlt -= back; -// if ((ip - ctx->inputBuffer) >= 32867) -// printf("mlt=%d back=%d off=%d longest=%d long_off=%d\n", mlt, back, (U32)(ip-matchPtr), longest, (U32)(*matchpos-*startpos)); - - if (mlt > longest) - if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - matchPtr == ctx->last_off) ? 1 : (ip - matchPtr)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip - *matchpos == ctx->last_off) ? 1 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - longest) )) + if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - matchPtr == ctx->last_off) ? 0 : (ip - matchPtr)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - longest) )) { longest = (int)mlt; *matchpos = matchPtr+back; @@ -516,12 +514,10 @@ static int LZ5HC_compress_generic ( BYTE* const oend = op + maxOutputSize; unsigned maxNbAttempts; - int ml, ml2, ml3, ml0; + int ml, ml2, ml0; const BYTE* ref=NULL; const BYTE* start2=NULL; const BYTE* ref2=NULL; - const BYTE* start3=NULL; - const BYTE* ref3=NULL; const BYTE* start0; const BYTE* ref0; const BYTE* lowPrefixPtr = ctx->base + ctx->dictLimit; @@ -534,9 +530,6 @@ static int LZ5HC_compress_generic ( ip++; - printf("maxNbAttempts=%d\n", maxNbAttempts); - int swapped = 0; - /* Main Loop */ while (ip < mflimit) { @@ -561,16 +554,6 @@ static int LZ5HC_compress_generic ( if (ml2 == 0) goto _Encode; -#if 0 - if (start2 < ip) - { - start3 = start2; ref3 = ref2; ml3 = ml2; - start2 = ip; ref2 = ref; ml2 = ml; - ip = start3; ref = ref3; ml = ml3; - swapped = 1; - } -#endif - int price, best_price, off0=0, off1=0; uint8_t *pos, *best_pos; @@ -585,33 +568,31 @@ static int LZ5HC_compress_generic ( int common0 = pos - ip; if (common0 >= MINMATCH) { - price = LZ5_CODEWORD_COST(ip - anchor, (off0 == ctx->last_off) ? 1 : off0, common0 - MINMATCH); + price = LZ5_CODEWORD_COST(ip - anchor, (off0 == ctx->last_off) ? 0 : off0, common0 - MINMATCH); int common1 = start2 + ml2 - pos; if (common1 >= MINMATCH) - price += LZ5_CODEWORD_COST(0, (off1 == off0) ? 1 : (off1), common1 - MINMATCH); + price += LZ5_CODEWORD_COST(0, (off1 == off0) ? 0 : (off1), common1 - MINMATCH); else price += LZ5_LIT_ONLY_COST(common1); -/* if ((U32)(ip - ctx->inputBuffer) == 73786) + if (price < best_price) { - printf("1b common0=%d common1=%d price=%d best_price=%d\n", common0, common1, price, best_price); - }*/ + best_price = price; + best_pos = pos; + } } else { - price = LZ5_CODEWORD_COST(start2 - anchor, (off1 == ctx->last_off) ? 1 : off1, ml2 - MINMATCH); + price = LZ5_CODEWORD_COST(start2 - anchor, (off1 == ctx->last_off) ? 0 : off1, ml2 - MINMATCH); - if ((U32)(ip - ctx->inputBuffer) == 73786) + if (price < best_price) { - printf("2 common0=%d price=%d best_price=%d\n", common0, price, best_price); + best_price = price; + best_pos = pos; } - } - if (price < best_price) - { - best_price = price; - best_pos = pos; + break; } } @@ -623,12 +604,6 @@ static int LZ5HC_compress_generic ( ip = start2; ref = ref2; ml = ml2; - // LZ5HC_DEBUG("%u: (ml < MINMATCH : %u -- match : %u -- offset : %u\n", (U32)(ip - ctx->inputBuffer), (U32)(ip - anchor), (U32)ml, (U32)(ip-ref)); - if (swapped) - { - swapped = 0; - goto _Encode; - } goto _Search; } @@ -636,7 +611,6 @@ static int LZ5HC_compress_generic ( if (start0 < ip) { - // LZ5HC_DEBUG("%u: 1literal : %u -- match : %u -- offset : %u\n", (U32)(ip - ctx->inputBuffer), (U32)(ip - anchor), (U32)ml, (U32)(ip-ref)); if (LZ5_MORE_PROFITABLE(ip - ref, ml, start0 - ref0, ml0, ref0 - ref, ctx->last_off)) { ip = start0; From 0c4abe161534ed0798e1f54eedee3d8d66d99122 Mon Sep 17 00:00:00 2001 From: inikep Date: Fri, 27 Nov 2015 20:32:06 +0100 Subject: [PATCH 07/22] small improvements --- lib/lz5hc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/lz5hc.c b/lib/lz5hc.c index 139ef47..ef8281c 100644 --- a/lib/lz5hc.c +++ b/lib/lz5hc.c @@ -336,14 +336,14 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( match = ip - offset; if (match > base && LZ5_read24(ip) == LZ5_read24(match)) { - int mlt = 3;//LZ5_count(ip+MINMATCH, match+MINMATCH, iHighLimit) + MINMATCH; + int mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, iHighLimit) + MINMATCH; int back = 0; while ((ip+back>iLowLimit) && (match+back > lowPrefixPtr) && (ip[back-1] == match[back-1])) back--; mlt -= back; if (mlt > longest) - if (!longest || LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + LZ5_NORMAL_LIT_COST(mlt - longest)) + if (!longest || LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip+back - *matchpos == ctx->last_off) ? 0 : (ip+back - *matchpos)) + LZ5_NORMAL_LIT_COST(mlt - longest)) { *matchpos = match+back; *startpos = ip+back; @@ -374,7 +374,7 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( mlt -= back; if (mlt > longest) - if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - matchPtr == ctx->last_off) ? 0 : (ip - matchPtr)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - longest) )) + if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - matchPtr == ctx->last_off) ? 0 : (ip - matchPtr)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip+back - *matchpos == ctx->last_off) ? 0 : (ip+back - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - longest) )) { longest = (int)mlt; *matchpos = matchPtr+back; From 65ac1512a42fea3a512495260ad1ee270b273d9b Mon Sep 17 00:00:00 2001 From: inikep Date: Sat, 28 Nov 2015 08:39:05 +0100 Subject: [PATCH 08/22] memory leak fix --- lib/lz5.c | 16 +++++--- lib/lz5hc.c | 112 ++++++++++++++++++++++++++-------------------------- 2 files changed, 67 insertions(+), 61 deletions(-) diff --git a/lib/lz5.c b/lib/lz5.c index b0e40c7..4c4b41f 100644 --- a/lib/lz5.c +++ b/lib/lz5.c @@ -316,6 +316,10 @@ static const int LZ5_minLength = (MFLIMIT+1); #define ML_RUN_BITS (ML_BITS + RUN_BITS) #define ML_RUN_BITS2 (ML_BITS + RUN_BITS2) +#define LZ5_SHORT_OFFSET_BITS 10 +#define LZ5_SHORT_OFFSET_DISTANCE (1< olimit))) return 0; /* Check output limit */ - if (ip-match >= (1<<10) && ip-match < (1<<16) && ip-match != last_off) + if (ip-match >= LZ5_SHORT_OFFSET_DISTANCE && ip-match < LZ5_MID_OFFSET_DISTANCE && ip-match != last_off) { if (litLength>=RUN_MASK) { @@ -658,13 +662,13 @@ FORCE_INLINE int LZ5_compress_generic( // printf("2last_off=%d *token=%d\n", last_off, *token); } else - if (ip-match < (1<<10)) + if (ip-match < LZ5_SHORT_OFFSET_DISTANCE) { *token+=((4+((ip-match)>>8))<= (1<<10) && ip-match < (1<<16) && ip-match != last_off) + if (ip-match >= LZ5_SHORT_OFFSET_DISTANCE && ip-match < LZ5_MID_OFFSET_DISTANCE && ip-match != last_off) { if (litLength>=RUN_MASK) { @@ -951,13 +955,13 @@ static int LZ5_compress_destSize_generic( *token+=(3<>8))< - - -/* ************************************* -* Common LZ5 definition -***************************************/ -#define LZ5_COMMONDEFS_ONLY -#include "lz5.c" /* ************************************* * Tuning Parameter ***************************************/ static const int LZ5HC_compressionLevel_default = 9; -#define LZ5HC_DEBUG(fmt, args...) ; //printf(fmt, ##args) - -#if MINMATCH == 3 - #define LZ5_read24(ptr) (uint32_t)(LZ5_read32(ptr)<<8) -#else - #define LZ5_read24(ptr) (uint32_t)(LZ5_read32(ptr)) -#endif - /*! * HEAPMODE : @@ -70,6 +50,13 @@ static const int LZ5HC_compressionLevel_default = 9; #define LZ5HC_HEAPMODE 0 +/* ************************************* +* Includes +***************************************/ +#include "lz5hc.h" +#include + + /* ************************************* * Local Compiler Options ***************************************/ @@ -82,6 +69,14 @@ static const int LZ5HC_compressionLevel_default = 9; #endif + +/* ************************************* +* Common LZ5 definition +***************************************/ +#define LZ5_COMMONDEFS_ONLY +#include "lz5.c" + + /* ************************************* * Local Constants ***************************************/ @@ -93,26 +88,10 @@ static const int LZ5HC_compressionLevel_default = 9; #define HASH_LOG3 16 #define HASHTABLESIZE (1 << HASH_LOG) #define HASHTABLESIZE3 (1 << HASH_LOG3) -#define LZ5_SHORT_OFFSET_BITS 10 -#define LZ5_SHORT_OFFSET_DISTANCE (1<(b))?(a):(b) #define LZ5_SHORT_LITERALS ((1< (1<<16)) || ((offset)(b))?(a):(b) #define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((32)-HASH_LOG)) #define HASH_FUNCTION3(i) (((i) * 506832829U) >> ((32)-HASH_LOG3)) @@ -149,9 +135,39 @@ static U32 LZ5HC_hashPtr3(const void* ptr) { return HASH_FUNCTION3(LZ5_read24(pt #define LZ5HC_LIMIT (1<<(DICTIONARY_LOGSIZE)) +#define LZ5HC_DEBUG(fmt, args...) ; //printf(fmt, ##args) + +#define LZ5_SHORT_LITLEN_COST(len) (len LZ5_MID_OFFSET_DISTANCE) || ((offset) 0) + sum = MAX(common + literals, best_common); + else + sum = MAX(common, best_common - literals); + +// return LZ5_CODEWORD_COST(sum - common, (off == last_off) ? 0 : (off), common - MINMATCH) <= LZ5_CODEWORD_COST(sum - best_common, (best_off == last_off) ? 0 : (best_off), best_common - MINMATCH); + return LZ5_NORMAL_MATCH_COST(common - MINMATCH, (off == last_off) ? 0 : (off)) + LZ5_NORMAL_LIT_COST(sum - common) <= LZ5_NORMAL_MATCH_COST(best_common - MINMATCH, (best_off == last_off) ? 0 : (best_off)) + LZ5_NORMAL_LIT_COST(sum - best_common); +} + + static void LZ5HC_init (LZ5HC_Data_Structure* ctx, const BYTE* start) { MEM_INIT((void*)ctx->hashTable, 0, sizeof(U32)*HASHTABLESIZE); @@ -271,18 +287,6 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* ctx, /* I return (int)ml; } -FORCE_INLINE int LZ5_MORE_PROFITABLE(uint32_t best_off, uint32_t best_common, uint32_t off, uint32_t common, int literals, uint32_t last_off) -{ - int sum; - - if (literals > 0) - sum = MAX(common + literals, best_common); - else - sum = MAX(common, best_common - literals); - -// return LZ5_CODEWORD_COST(sum - common, (off == last_off) ? 0 : (off), common - MINMATCH) <= LZ5_CODEWORD_COST(sum - best_common, (best_off == last_off) ? 0 : (best_off), best_common - MINMATCH); - return LZ5_NORMAL_MATCH_COST(common - MINMATCH, (off == last_off) ? 0 : (off)) + LZ5_NORMAL_LIT_COST(sum - common) <= LZ5_NORMAL_MATCH_COST(best_common - MINMATCH, (best_off == last_off) ? 0 : (best_off)) + LZ5_NORMAL_LIT_COST(sum - best_common); -} FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( @@ -435,7 +439,7 @@ FORCE_INLINE int LZ5HC_encodeSequence ( token = (*op)++; if ((limitedOutputBuffer) && ((*op + (length>>8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1; /* Check output limit */ - if (*ip-match >= (1<<10) && *ip-match < (1<<16) && *ip-match != ctx->last_off) + if (*ip-match >= LZ5_SHORT_OFFSET_DISTANCE && *ip-match < LZ5_MID_OFFSET_DISTANCE && *ip-match != ctx->last_off) { if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } else *token = (BYTE)(length<>8))<hashTable = ALLOCATOR(1, sizeof(U32)*HASHTABLESIZE); + statePtr->hashTable = ALLOCATOR(1, sizeof(U32)*(HASHTABLESIZE3+HASHTABLESIZE)); if (!statePtr->hashTable) return 0; - statePtr->hashTable3 = ALLOCATOR(1, sizeof(U32)*HASHTABLESIZE3); - if (!statePtr->hashTable3) - return 0; + statePtr->hashTable3 = statePtr->hashTable + HASHTABLESIZE; statePtr->chainTable = ALLOCATOR(1, sizeof(U32)*MAXD); if (!statePtr->chainTable) From d7723ff39350c305d62188c856bb857d641f7c01 Mon Sep 17 00:00:00 2001 From: inikep Date: Sat, 28 Nov 2015 10:25:28 +0100 Subject: [PATCH 09/22] mem.h and lz5common.h --- .gitignore | 3 + lib/lz5.c | 444 +++----------------------------------------- lib/lz5common.h | 135 ++++++++++++++ lib/lz5hc.c | 124 +++++++------ lib/mem.h | 481 ++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 721 insertions(+), 466 deletions(-) create mode 100644 lib/lz5common.h create mode 100644 lib/mem.h diff --git a/.gitignore b/.gitignore index a21b019..1ccbb8e 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,9 @@ examples/ versionsTest/ visual/ +# Archives +*.zip + # Object files *.o *.ko diff --git a/lib/lz5.c b/lib/lz5.c index 4c4b41f..7e0e776 100644 --- a/lib/lz5.c +++ b/lib/lz5.c @@ -33,391 +33,10 @@ - LZ5 public forum : https://groups.google.com/forum/#!forum/lz5c */ - -/************************************** -* Tuning parameters -**************************************/ -/* - * HEAPMODE : - * Select how default compression functions will allocate memory for their hash table, - * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). - */ -#define HEAPMODE 0 - -/* - * ACCELERATION_DEFAULT : - * Select "acceleration" for LZ5_compress_fast() when parameter value <= 0 - */ -#define ACCELERATION_DEFAULT 1 - - -/************************************** -* CPU Feature Detection -**************************************/ -/* LZ5_FORCE_MEMORY_ACCESS - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method is portable but violate C standard. - * It can generate buggy code on targets which generate assembly depending on alignment. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. - * Prefer these methods in priority order (0 > 1 > 2) - */ -#ifndef LZ5_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define LZ5_FORCE_MEMORY_ACCESS 2 -# elif defined(__INTEL_COMPILER) || \ - (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) -# define LZ5_FORCE_MEMORY_ACCESS 1 -# endif -#endif - -/* - * LZ5_FORCE_SW_BITCOUNT - * Define this parameter if your target system or compiler does not support hardware bit count - */ -#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for Windows CE does not support Hardware bit count */ -# define LZ5_FORCE_SW_BITCOUNT -#endif +#include "mem.h" +#include "lz5common.h" -/************************************** -* Includes -**************************************/ -#include "lz5.h" - - -/************************************** -* Compiler Options -**************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# define FORCE_INLINE static __forceinline -# include -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4293) /* disable: C4293: too large shift (32-bits) */ -#else -# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ -# if defined(__GNUC__) || defined(__clang__) -# define FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define FORCE_INLINE static inline -# endif -# else -# define FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -#endif /* _MSC_VER */ - -/* LZ5_GCC_VERSION is defined into lz5.h */ -#if (LZ5_GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) -# define expect(expr,value) (__builtin_expect ((expr),(value)) ) -#else -# define expect(expr,value) (expr) -#endif - -#define likely(expr) expect((expr) != 0, 1) -#define unlikely(expr) expect((expr) != 0, 0) - - -/************************************** -* Memory routines -**************************************/ -#include /* malloc, calloc, free */ -#define ALLOCATOR(n,s) calloc(n,s) -#define FREEMEM free -#include /* memset, memcpy */ -#define MEM_INIT memset - - -/************************************** -* Basic Types -**************************************/ -#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; -#else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; -#endif - - -/************************************** -* Reading and writing into memory -**************************************/ -#define STEPSIZE sizeof(size_t) - -static unsigned LZ5_64bits(void) { return sizeof(void*)==8; } - -static unsigned LZ5_isLittleEndian(void) -{ - const union { U32 i; BYTE c[4]; } one = { 1 }; // don't use static : performance detrimental - return one.c[0]; -} - - -#if defined(LZ5_FORCE_MEMORY_ACCESS) && (LZ5_FORCE_MEMORY_ACCESS==2) - -static U16 LZ5_read16(const void* memPtr) { return *(const U16*) memPtr; } -static U32 LZ5_read32(const void* memPtr) { return *(const U32*) memPtr; } -static size_t LZ5_read_ARCH(const void* memPtr) { return *(const size_t*) memPtr; } - -static void LZ5_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } - -#elif defined(LZ5_FORCE_MEMORY_ACCESS) && (LZ5_FORCE_MEMORY_ACCESS==1) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U16 u16; U32 u32; size_t uArch; } __attribute__((packed)) unalign; - -static U16 LZ5_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } -static U32 LZ5_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -static size_t LZ5_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; } - -static void LZ5_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } - -#else - -static U16 LZ5_read16(const void* memPtr) -{ - U16 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -static U32 LZ5_read32(const void* memPtr) -{ - U32 val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -static size_t LZ5_read_ARCH(const void* memPtr) -{ - size_t val; memcpy(&val, memPtr, sizeof(val)); return val; -} - -static void LZ5_write16(void* memPtr, U16 value) -{ - memcpy(memPtr, &value, sizeof(value)); -} - -#endif // LZ5_FORCE_MEMORY_ACCESS - -static U16 LZ5_readLE16(const void* memPtr) -{ - if (LZ5_isLittleEndian()) - { - return LZ5_read16(memPtr); - } - else - { - const BYTE* p = (const BYTE*)memPtr; - return (U16)((U16)p[0] + (p[1]<<8)); - } -} - -static U32 LZ5_readLE24(const void* memPtr) -{ - if (LZ5_isLittleEndian()) - { - U32 val32 = 0; - memcpy(&val32, memPtr, 3); - return val32; - } - else - { - const BYTE* p = (const BYTE*)memPtr; - return (U32)(p[0] + (p[1]<<8) + (p[2]<<16)); - } -} - -static void LZ5_writeLE16(void* memPtr, U16 value) -{ - if (LZ5_isLittleEndian()) - { - LZ5_write16(memPtr, value); - } - else - { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE) value; - p[1] = (BYTE)(value>>8); - } -} - -static void LZ5_writeLE24(void* memPtr, U32 value) -{ - if (LZ5_isLittleEndian()) - { - memcpy(memPtr, &value, 3); - } - else - { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE) value; - p[1] = (BYTE)(value>>8); - p[2] = (BYTE)(value>>16); - } -} - - -static void LZ5_copy8(void* dst, const void* src) -{ - memcpy(dst,src,8); -} - -/* customized variant of memcpy, which can overwrite up to 7 bytes beyond dstEnd */ -static void LZ5_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) -{ - BYTE* d = (BYTE*)dstPtr; - const BYTE* s = (const BYTE*)srcPtr; - BYTE* const e = (BYTE*)dstEnd; - -#if 0 - const size_t l2 = 8 - (((size_t)d) & (sizeof(void*)-1)); - LZ5_copy8(d,s); if (d>e-9) return; - d+=l2; s+=l2; -#endif /* join to align */ - - do { LZ5_copy8(d,s); d+=8; s+=8; } while (d>3); -# elif (defined(__clang__) || (LZ5_GCC_VERSION >= 304)) && !defined(LZ5_FORCE_SW_BITCOUNT) - return (__builtin_ctzll((U64)val) >> 3); -# else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; -# endif - } - else /* 32 bits */ - { -# if defined(_MSC_VER) && !defined(LZ5_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanForward( &r, (U32)val ); - return (int)(r>>3); -# elif (defined(__clang__) || (LZ5_GCC_VERSION >= 304)) && !defined(LZ5_FORCE_SW_BITCOUNT) - return (__builtin_ctz((U32)val) >> 3); -# else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; -# endif - } - } - else /* Big Endian CPU */ - { - if (LZ5_64bits()) - { -# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ5_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (unsigned)(r>>3); -# elif (defined(__clang__) || (LZ5_GCC_VERSION >= 304)) && !defined(LZ5_FORCE_SW_BITCOUNT) - return (__builtin_clzll((U64)val) >> 3); -# else - unsigned r; - if (!(val>>32)) { r=4; } else { r=0; val>>=32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; -# endif - } - else /* 32 bits */ - { -# if defined(_MSC_VER) && !defined(LZ5_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse( &r, (unsigned long)val ); - return (unsigned)(r>>3); -# elif (defined(__clang__) || (LZ5_GCC_VERSION >= 304)) && !defined(LZ5_FORCE_SW_BITCOUNT) - return (__builtin_clz((U32)val) >> 3); -# else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; -# endif - } - } -} - -static unsigned LZ5_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) -{ - const BYTE* const pStart = pIn; - - while (likely(pIn matchlimit) limit = matchlimit; - matchLength = LZ5_count(ip+MINMATCH, match+MINMATCH, limit); + matchLength = MEM_count(ip+MINMATCH, match+MINMATCH, limit); ip += MINMATCH + matchLength; if (ip==limit) { - unsigned more = LZ5_count(ip, (const BYTE*)source, matchlimit); + unsigned more = MEM_count(ip, (const BYTE*)source, matchlimit); matchLength += more; ip += more; } } else { - matchLength = LZ5_count(ip+MINMATCH, match+MINMATCH, matchlimit); + matchLength = MEM_count(ip+MINMATCH, match+MINMATCH, matchlimit); ip += MINMATCH + matchLength; } @@ -744,7 +363,7 @@ FORCE_INLINE int LZ5_compress_generic( LZ5_putPosition(ip, ctx, tableType, base); if ( ((dictIssue==dictSmall) ? (match>=lowRefLimit) : 1) && (match+MAX_DISTANCE>=ip) - && (LZ5_read32(match+refDelta)==LZ5_read32(ip)) ) + && (MEM_read32(match+refDelta)==MEM_read32(ip)) ) { token=op++; *token=0; goto _next_match; } /* Prepare next loop */ @@ -787,14 +406,14 @@ int LZ5_compress_fast_extState(void* state, const char* source, char* dest, int if (inputSize < LZ5_64Klimit) return LZ5_compress_generic(state, source, dest, inputSize, 0, notLimited, byU16, noDict, noDictIssue, acceleration); else - return LZ5_compress_generic(state, source, dest, inputSize, 0, notLimited, LZ5_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); + return LZ5_compress_generic(state, source, dest, inputSize, 0, notLimited, MEM_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); } else { if (inputSize < LZ5_64Klimit) return LZ5_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); else - return LZ5_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, LZ5_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); + return LZ5_compress_generic(state, source, dest, inputSize, maxOutputSize, limitedOutput, MEM_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); } } @@ -834,7 +453,7 @@ int LZ5_compress_fast_force(const char* source, char* dest, int inputSize, int m if (inputSize < LZ5_64Klimit) return LZ5_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); else - return LZ5_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, LZ5_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); + return LZ5_compress_generic(&ctx, source, dest, inputSize, maxOutputSize, limitedOutput, MEM_64bits() ? byU32 : byPtr, noDict, noDictIssue, acceleration); } @@ -903,7 +522,7 @@ static int LZ5_compress_destSize_generic( LZ5_putPositionOnHash(ip, h, ctx, tableType, base); } while ( ((tableType==byU16) ? 0 : (match + MAX_DISTANCE < ip)) - || (LZ5_read32(match) != LZ5_read32(ip)) ); + || (MEM_read32(match) != MEM_read32(ip)) ); } /* Catch up */ @@ -944,7 +563,7 @@ static int LZ5_compress_destSize_generic( } /* Copy Literals */ - LZ5_wildCopy(op, anchor, op+litLength); + MEM_wildCopy(op, anchor, op+litLength); op += litLength; } @@ -963,12 +582,12 @@ static int LZ5_compress_destSize_generic( else if (ip-match < LZ5_MID_OFFSET_DISTANCE) { - LZ5_writeLE16(op, (U16)(ip-match)); op+=2; + MEM_writeLE16(op, (U16)(ip-match)); op+=2; } else { *token+=(2< oMaxMatch) { @@ -1008,7 +627,7 @@ static int LZ5_compress_destSize_generic( match = LZ5_getPosition(ip, ctx, tableType, base); LZ5_putPosition(ip, ctx, tableType, base); if ( (match+MAX_DISTANCE>=ip) - && (LZ5_read32(match)==LZ5_read32(ip)) ) + && (MEM_read32(match)==MEM_read32(ip)) ) { token=op++; *token=0; goto _next_match; } /* Prepare next loop */ @@ -1061,7 +680,7 @@ static int LZ5_compress_destSize_extState (void* state, const char* src, char* d if (*srcSizePtr < LZ5_64Klimit) return LZ5_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, byU16); else - return LZ5_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, LZ5_64bits() ? byU32 : byPtr); + return LZ5_compress_destSize_generic(state, src, dst, srcSizePtr, targetDstSize, MEM_64bits() ? byU32 : byPtr); } } @@ -1365,7 +984,7 @@ FORCE_INLINE int LZ5_decompress_generic( op += length; break; /* Necessarily EOF, due to parsing restrictions */ } - LZ5_wildCopy(op, ip, cpy); + MEM_wildCopy(op, ip, cpy); ip += length; op = cpy; /* get offset */ @@ -1373,12 +992,12 @@ FORCE_INLINE int LZ5_decompress_generic( switch (token>>6) { default: offset = *ip + (((token>>ML_RUN_BITS2)&3)<<8); ip++; break; - case 0: offset = LZ5_readLE16(ip); ip+=2; break; + case 0: offset = MEM_readLE16(ip); ip+=2; break; case 1: if ((token>>5) == 3) offset = last_off; else // (token>>ML_RUN_BITS2) == 2 - { offset = LZ5_readLE24(ip); ip+=3; } + { offset = MEM_readLE24(ip); ip+=3; } break; } #else @@ -1389,12 +1008,12 @@ FORCE_INLINE int LZ5_decompress_generic( else if ((token>>ML_RUN_BITS) == 0) { - offset = LZ5_readLE16(ip); ip+=2; + offset = MEM_readLE16(ip); ip+=2; } else if ((token>>ML_RUN_BITS2) == 2) { - offset = LZ5_readLE24(ip); ip+=3; + offset = MEM_readLE24(ip); ip+=3; } else // (token>>ML_RUN_BITS2) == 3 { @@ -1466,7 +1085,7 @@ FORCE_INLINE int LZ5_decompress_generic( match += dec32table[offset]; memcpy(op+4, match, 4); match -= dec64; - } else { LZ5_copy8(op, match); match+=8; } + } else { MEM_copy8(op, match); match+=8; } op += 8; if (unlikely(cpy>oend-12)) @@ -1475,14 +1094,14 @@ FORCE_INLINE int LZ5_decompress_generic( if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ if (op < oCopyLimit) { - LZ5_wildCopy(op, match, oCopyLimit); + MEM_wildCopy(op, match, oCopyLimit); match += oCopyLimit - op; op = oCopyLimit; } while (op +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4293) /* disable: C4293: too large shift (32-bits) */ +#else +# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */ +# if defined(__GNUC__) || defined(__clang__) +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif /* _MSC_VER */ + +/* LZ5_GCC_VERSION is defined into lz5.h */ +#if (LZ5_GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) +# define expect(expr,value) (__builtin_expect ((expr),(value)) ) +#else +# define expect(expr,value) (expr) +#endif + +#define likely(expr) expect((expr) != 0, 1) +#define unlikely(expr) expect((expr) != 0, 0) + + +/************************************** +* Memory routines +**************************************/ +#include /* malloc, calloc, free */ +#define ALLOCATOR(n,s) calloc(n,s) +#define FREEMEM free +#include /* memset, memcpy */ +#define MEM_INIT memset + + +/************************************** +* Common Constants +**************************************/ +#define MINMATCH 3 + +#define WILDCOPYLENGTH 8 +#define LASTLITERALS 5 +#define MFLIMIT (WILDCOPYLENGTH+MINMATCH) +static const int LZ5_minLength = (MFLIMIT+1); + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define MAXD_LOG 22 +#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) +#define LZ5_DICT_SIZE (1 << MAXD_LOG) + +#define ML_BITS 3 +#define ML_MASK ((1U< -/* ************************************* -* Local Compiler Options -***************************************/ -#if defined(__GNUC__) -# pragma GCC diagnostic ignored "-Wunused-function" -#endif - -#if defined (__clang__) -# pragma clang diagnostic ignored "-Wunused-function" -#endif - - /* ************************************* * Common LZ5 definition ***************************************/ -#define LZ5_COMMONDEFS_ONLY -#include "lz5.c" +#include "mem.h" +#include "lz5common.h" /* ************************************* @@ -92,6 +80,8 @@ static const int LZ5HC_compressionLevel_default = 9; #define LZ5_SHORT_LITERALS ((1<(b))?(a):(b) -#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((32)-HASH_LOG)) -#define HASH_FUNCTION3(i) (((i) * 506832829U) >> ((32)-HASH_LOG3)) +static const U32 prime3bytes = 506832829U; +static U32 LZ5HC_hash3(U32 u, U32 h) { return (u * prime3bytes) << (32-24) >> (32-h) ; } +static size_t LZ5HC_hash3Ptr(const void* ptr, U32 h) { return LZ5HC_hash3(MEM_read32(ptr), h); } + +static const U32 prime4bytes = 2654435761U; +static U32 LZ5HC_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t LZ5HC_hash4Ptr(const void* ptr, U32 h) { return LZ5HC_hash4(MEM_read32(ptr), h); } -static U32 LZ5HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ5_read32(ptr)); } -static U32 LZ5HC_hashPtr3(const void* ptr) { return HASH_FUNCTION3(LZ5_read24(ptr)); } +static const U64 prime5bytes = 889523592379ULL; +static size_t LZ5HC_hash5(U64 u, U32 h) { return (size_t)((u * prime5bytes) << (64-40) >> (64-h)) ; } +static size_t LZ5HC_hash5Ptr(const void* p, U32 h) { return LZ5HC_hash5(MEM_read64(p), h); } -#define LZ5HC_LIMIT (1<<(DICTIONARY_LOGSIZE)) +static const U64 prime6bytes = 227718039650203ULL; +static size_t LZ5HC_hash6(U64 u, U32 h) { return (size_t)((u * prime6bytes) << (64-48) >> (64-h)) ; } +static size_t LZ5HC_hash6Ptr(const void* p, U32 h) { return LZ5HC_hash6(MEM_read64(p), h); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t LZ5HC_hash7(U64 u, U32 h) { return (size_t)((u * prime7bytes) << (64-56) >> (64-h)) ; } +static size_t LZ5HC_hash7Ptr(const void* p, U32 h) { return LZ5HC_hash7(MEM_read64(p), h); } + +static size_t LZ5HC_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return LZ5HC_hash4Ptr(p, hBits); + case 5: return LZ5HC_hash5Ptr(p, hBits); + case 6: return LZ5HC_hash6Ptr(p, hBits); + case 7: return LZ5HC_hash7Ptr(p, hBits); + } +} +/************************************** +* Local Macros +**************************************/ #define LZ5HC_DEBUG(fmt, args...) ; //printf(fmt, ##args) +#define MAX(a,b) ((a)>(b))?(a):(b) #define LZ5_SHORT_LITLEN_COST(len) (lenparams.hashLog, ctx->params.searchLength); chainTable[idx & MAXD_MASK] = (U32)(idx - HashTable[h]); HashTable[h] = idx; #if MINMATCH == 3 - HashTable3[LZ5HC_hashPtr3(base+idx)] = idx; + HashTable3[LZ5HC_hash3Ptr(base+idx, ctx->params.hashLog3)] = idx; #endif idx++; } @@ -229,24 +248,24 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* ctx, /* I /* HC4 match finder */ LZ5HC_Insert(ctx, ip); - matchIndex = HashTable[LZ5HC_hashPtr(ip)]; + matchIndex = HashTable[LZ5HC_hashPtr(ip, ctx->params.hashLog, ctx->params.searchLength)]; match = ip - ctx->last_off; - if (LZ5_read24(match) == LZ5_read24(ip)) + if (MEM_read24(match) == MEM_read24(ip)) { - ml = LZ5_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + ml = MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; *matchpos = match; return (int)ml; } #if MINMATCH == 3 - size_t offset = ip - base - ctx->hashTable3[LZ5HC_hashPtr3(ip)]; + size_t offset = ip - base - ctx->hashTable3[LZ5HC_hash3Ptr(ip, ctx->params.hashLog3)]; if (offset > 0 && offset < LZ5_SHORT_OFFSET_DISTANCE) { match = ip - offset; - if (match > base && LZ5_read24(ip) == LZ5_read24(match)) + if (match > base && MEM_read24(ip) == MEM_read24(match)) { - ml = 3;//LZ5_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + ml = 3;//MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; *matchpos = match; } } @@ -258,9 +277,9 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* ctx, /* I if (matchIndex >= dictLimit) { match = base + matchIndex; - if (match < ip && *(match+ml) == *(ip+ml) && (LZ5_read32(match) == LZ5_read32(ip))) + if (match < ip && *(match+ml) == *(ip+ml) && (MEM_read32(match) == MEM_read32(ip))) { - mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; if (mlt > ml) if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml))) { ml = mlt; *matchpos = match; } @@ -269,13 +288,13 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* ctx, /* I else { match = dictBase + matchIndex; - if (LZ5_read32(match) == LZ5_read32(ip)) + if (MEM_read32(match) == MEM_read32(ip)) { const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iLimit) vLimit = iLimit; - mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iLimit)) - mlt += LZ5_count(ip+mlt, base+dictLimit, iLimit); + mlt += MEM_count(ip+mlt, base+dictLimit, iLimit); if (mlt > ml) if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml))) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ @@ -313,12 +332,12 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( /* First Match */ LZ5HC_Insert(ctx, ip); - matchIndex = HashTable[LZ5HC_hashPtr(ip)]; + matchIndex = HashTable[LZ5HC_hashPtr(ip, ctx->params.hashLog, ctx->params.searchLength)]; match = ip - ctx->last_off; - if (LZ5_read24(match) == LZ5_read24(ip)) + if (MEM_read24(match) == MEM_read24(ip)) { - int mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, iHighLimit) + MINMATCH; + int mlt = MEM_count(ip+MINMATCH, match+MINMATCH, iHighLimit) + MINMATCH; int back = 0; while ((ip+back>iLowLimit) && (match+back > lowPrefixPtr) && (ip[back-1] == match[back-1])) back--; @@ -334,13 +353,13 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( #if MINMATCH == 3 - size_t offset = ip - base - ctx->hashTable3[LZ5HC_hashPtr3(ip)]; + size_t offset = ip - base - ctx->hashTable3[LZ5HC_hash3Ptr(ip, ctx->params.hashLog3)]; if (offset > 0 && offset < LZ5_SHORT_OFFSET_DISTANCE) { match = ip - offset; - if (match > base && LZ5_read24(ip) == LZ5_read24(match)) + if (match > base && MEM_read24(ip) == MEM_read24(match)) { - int mlt = LZ5_count(ip+MINMATCH, match+MINMATCH, iHighLimit) + MINMATCH; + int mlt = MEM_count(ip+MINMATCH, match+MINMATCH, iHighLimit) + MINMATCH; int back = 0; while ((ip+back>iLowLimit) && (match+back > lowPrefixPtr) && (ip[back-1] == match[back-1])) back--; @@ -365,9 +384,9 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( const BYTE* matchPtr = base + matchIndex; // if (*(ip + longest) == *(matchPtr + longest)) - if (matchPtr < ip && LZ5_read32(matchPtr) == LZ5_read32(ip)) + if (matchPtr < ip && MEM_read32(matchPtr) == MEM_read32(ip)) { - int mlt = MINMATCH + LZ5_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); + int mlt = MINMATCH + MEM_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); int back = 0; while ((ip+back>iLowLimit) @@ -389,15 +408,15 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( else { const BYTE* matchPtr = dictBase + matchIndex; - if (LZ5_read32(matchPtr) == LZ5_read32(ip)) + if (MEM_read32(matchPtr) == MEM_read32(ip)) { size_t mlt; int back=0; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iHighLimit) vLimit = iHighLimit; - mlt = LZ5_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; + mlt = MEM_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iHighLimit)) - mlt += LZ5_count(ip+mlt, base+dictLimit, iHighLimit); + mlt += MEM_count(ip+mlt, base+dictLimit, iHighLimit); while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchPtr[back-1])) back--; mlt -= back; if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; } @@ -452,7 +471,7 @@ FORCE_INLINE int LZ5HC_encodeSequence ( } /* Copy Literals */ - LZ5_wildCopy(*op, *anchor, (*op) + length); + MEM_wildCopy(*op, *anchor, (*op) + length); *op += length; /* Encode Offset */ @@ -470,12 +489,12 @@ FORCE_INLINE int LZ5HC_encodeSequence ( else if (*ip-match < LZ5_MID_OFFSET_DISTANCE) { - LZ5_writeLE16(*op, (U16)(*ip-match)); *op+=2; + MEM_writeLE16(*op, (U16)(*ip-match)); *op+=2; } else { *token+=(2<last_off = *ip-match; @@ -506,6 +525,7 @@ static int LZ5HC_compress_generic ( ) { LZ5HC_Data_Structure* ctx = (LZ5HC_Data_Structure*) ctxvoid; + ctx->params = LZ5HC_defaultParameters[compressionLevel]; ctx->inputBuffer = (BYTE*) source; ctx->outputBuffer = (BYTE*) dest; const BYTE* ip = (const BYTE*) source; diff --git a/lib/mem.h b/lib/mem.h new file mode 100644 index 0000000..922fdbb --- /dev/null +++ b/lib/mem.h @@ -0,0 +1,481 @@ +/* ****************************************************************** + mem.h + low-level memory access routines + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/****************************************** +* Includes +******************************************/ +#include /* size_t, ptrdiff_t */ +#include /* memcpy */ + + + +/* ************************************* +* Local Compiler Options +***************************************/ +#if defined(__GNUC__) +# pragma GCC diagnostic ignored "-Wunused-function" +#endif + +#if defined (__clang__) +# pragma clang diagnostic ignored "-Wunused-function" +#endif + + + + +/****************************************** +* Compiler-specific +******************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#elif defined(__GNUC__) +# define MEM_STATIC static __attribute__((unused)) +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/**************************************************************** +* Basic Types +*****************************************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef signed short S16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/**************************************************************** +* Memory I/O +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets generating assembly depending on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define MEM_FORCE_MEMORY_ACCESS 2 +# elif defined(__INTEL_COMPILER) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U16 u16; U32 u32; U64 u64; } __attribute__((packed)) unalign; + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write32(void* memPtr, U32 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write64(void* memPtr, U64 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +#endif // MEM_FORCE_MEMORY_ACCESS + + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) + { + MEM_write16(memPtr, val); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + if (MEM_isLittleEndian()) + { + U32 val32 = 0; + memcpy(&val32, memPtr, 3); + return val32; + } + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U32)(p[0] + (p[1]<<8) + (p[2]<<16)); + } +} + +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 value) +{ + if (MEM_isLittleEndian()) + { + memcpy(memPtr, &value, 3); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE) value; + p[1] = (BYTE)(value>>8); + p[2] = (BYTE)(value>>16); + } +} + + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); + } +} + +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + { + MEM_write32(memPtr, val32); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val32; + p[1] = (BYTE)(val32>>8); + p[2] = (BYTE)(val32>>16); + p[3] = (BYTE)(val32>>24); + } +} + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) + + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); + } +} + +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + { + MEM_write64(memPtr, val64); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val64; + p[1] = (BYTE)(val64>>8); + p[2] = (BYTE)(val64>>16); + p[3] = (BYTE)(val64>>24); + p[4] = (BYTE)(val64>>32); + p[5] = (BYTE)(val64>>40); + p[6] = (BYTE)(val64>>48); + p[7] = (BYTE)(val64>>56); + } +} + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + + + +#define MEM_read24(ptr) (uint32_t)(MEM_read32(ptr)<<8) + +/* ************************************** +* Function body to include for inlining +****************************************/ +static size_t MEM_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; } + +#define MIN(a,b) ((a)<(b) ? (a) : (b)) + +static unsigned MEM_highbit(U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + _BitScanReverse(&r, val); + return (unsigned)r; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return 31 - __builtin_clz(val); +# else /* Software version */ + static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + int r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + +MEM_STATIC unsigned MEM_NbCommonBytes (register size_t val) +{ + if (MEM_isLittleEndian()) + { + if (MEM_64bits()) + { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } + else /* 32 bits */ + { +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (int)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } + else /* Big Endian CPU */ + { + if (MEM_32bits()) + { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } + else /* 32 bits */ + { +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } + } +} + + +MEM_STATIC size_t MEM_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +{ + const BYTE* const pStart = pIn; + + while ((pIn Date: Sat, 28 Nov 2015 11:36:18 +0100 Subject: [PATCH 10/22] introduced LZ5HC_parameters --- lib/lz5.c | 12 ++- lib/lz5common.h | 134 ++++++++++++++++++++++++--- lib/lz5frame.c | 4 +- lib/lz5hc.c | 214 ++++++++++--------------------------------- lib/lz5hc.h | 8 +- programs/fullbench.c | 2 +- 6 files changed, 185 insertions(+), 189 deletions(-) diff --git a/lib/lz5.c b/lib/lz5.c index 7e0e776..fd5ae2d 100644 --- a/lib/lz5.c +++ b/lib/lz5.c @@ -33,15 +33,20 @@ - LZ5 public forum : https://groups.google.com/forum/#!forum/lz5c */ + + +/************************************** +* Includes +**************************************/ #include "mem.h" #include "lz5common.h" +#include "lz5.h" /************************************** * Local Constants **************************************/ #define LZ5_HASHLOG (LZ5_MEMORY_USAGE-2) -#define HASHTABLESIZE (1 << LZ5_MEMORY_USAGE) #define HASH_SIZE_U32 (1 << LZ5_HASHLOG) /* required as macro for static allocation */ static const int LZ5_64Klimit = ((64 KB) + (MFLIMIT-1)); @@ -86,12 +91,11 @@ int LZ5_sizeofState() { return LZ5_STREAMSIZE; } static U32 LZ5_hashSequence(U32 sequence, tableType_t const tableType) { if (tableType == byU16) - return (((sequence) * 2654435761U) >> ((32)-(LZ5_HASHLOG+1))); + return (((sequence) * prime4bytes) >> ((32)-(LZ5_HASHLOG+1))); else - return (((sequence) * 2654435761U) >> ((32)-LZ5_HASHLOG)); + return (((sequence) * prime4bytes) >> ((32)-LZ5_HASHLOG)); } -static const U64 prime5bytes = 889523592379ULL; static U32 LZ5_hashSequence64(size_t sequence, tableType_t const tableType) { const U32 hashLog = (tableType == byU16) ? LZ5_HASHLOG+1 : LZ5_HASHLOG; diff --git a/lib/lz5common.h b/lib/lz5common.h index b400cc9..33c683d 100644 --- a/lib/lz5common.h +++ b/lib/lz5common.h @@ -1,3 +1,10 @@ +#ifndef LZ5COMMON_H +#define LZ5COMMON_H + +#if defined (__cplusplus) +extern "C" { +#endif + /************************************** * Tuning parameters @@ -8,6 +15,8 @@ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). */ #define HEAPMODE 0 +#define LZ5HC_HEAPMODE 0 + /* * ACCELERATION_DEFAULT : @@ -16,10 +25,6 @@ #define ACCELERATION_DEFAULT 1 -/************************************** -* Includes -**************************************/ -#include "lz5.h" /************************************** @@ -103,8 +108,85 @@ static const int LZ5_minLength = (MFLIMIT+1); + + /* ************************************* -* Types +* HC Constants +***************************************/ +#define DICTIONARY_LOGSIZE 22 +#define MAXD (1<> (32-h) ; } +static size_t LZ5HC_hash3Ptr(const void* ptr, U32 h) { return LZ5HC_hash3(MEM_read32(ptr), h); } + +static const U32 prime4bytes = 2654435761U; +static U32 LZ5HC_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t LZ5HC_hash4Ptr(const void* ptr, U32 h) { return LZ5HC_hash4(MEM_read32(ptr), h); } + +static const U64 prime5bytes = 889523592379ULL; +static size_t LZ5HC_hash5(U64 u, U32 h) { return (size_t)((u * prime5bytes) << (64-40) >> (64-h)) ; } +static size_t LZ5HC_hash5Ptr(const void* p, U32 h) { return LZ5HC_hash5(MEM_read64(p), h); } + +static const U64 prime6bytes = 227718039650203ULL; +static size_t LZ5HC_hash6(U64 u, U32 h) { return (size_t)((u * prime6bytes) << (64-48) >> (64-h)) ; } +static size_t LZ5HC_hash6Ptr(const void* p, U32 h) { return LZ5HC_hash6(MEM_read64(p), h); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t LZ5HC_hash7(U64 u, U32 h) { return (size_t)((u * prime7bytes) << (64-56) >> (64-h)) ; } +static size_t LZ5HC_hash7Ptr(const void* p, U32 h) { return LZ5HC_hash7(MEM_read64(p), h); } + +static size_t LZ5HC_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return LZ5HC_hash4Ptr(p, hBits); + case 5: return LZ5HC_hash5Ptr(p, hBits); + case 6: return LZ5HC_hash6Ptr(p, hBits); + case 7: return LZ5HC_hash7Ptr(p, hBits); + } +} + + +/************************************** +* HC Local Macros +**************************************/ +#define LZ5HC_DEBUG(fmt, args...) ; //printf(fmt, ##args) +#define MAX(a,b) ((a)>(b))?(a):(b) + +#define LZ5_SHORT_LITLEN_COST(len) (len LZ5_MID_OFFSET_DISTANCE) || ((offset)prefs.compressionLevel < minHClevel) cctxPtr->lz5CtxPtr = (void*)LZ5_createStream(); else - cctxPtr->lz5CtxPtr = (void*)LZ5_createStreamHC(); + cctxPtr->lz5CtxPtr = (void*)LZ5_createStreamHC(cctxPtr->prefs.compressionLevel); cctxPtr->lz5CtxLevel = tableID; } } @@ -439,7 +439,7 @@ size_t LZ5F_compressBegin(LZ5F_compressionContext_t compressionContext, void* ds if (cctxPtr->prefs.compressionLevel < minHClevel) LZ5_resetStream((LZ5_stream_t*)(cctxPtr->lz5CtxPtr)); else - LZ5_resetStreamHC((LZ5_streamHC_t*)(cctxPtr->lz5CtxPtr), cctxPtr->prefs.compressionLevel); + LZ5_resetStreamHC((LZ5_streamHC_t*)(cctxPtr->lz5CtxPtr)); /* Magic Number */ LZ5F_writeLE32(dstPtr, LZ5F_MAGICNUMBER); diff --git a/lib/lz5hc.c b/lib/lz5hc.c index f96c9f0..c67666b 100644 --- a/lib/lz5hc.c +++ b/lib/lz5hc.c @@ -36,144 +36,21 @@ -/* ************************************* -* Tuning Parameter -***************************************/ -static const int LZ5HC_compressionLevel_default = 9; - -/*! - * HEAPMODE : - * Select how default compression function will allocate workplace memory, - * in stack (0:fastest), or in heap (1:requires malloc()). - * Since workplace is rather large, heap mode is recommended. - */ -#define LZ5HC_HEAPMODE 0 - - /* ************************************* * Includes ***************************************/ #include "lz5hc.h" -#include - - - -/* ************************************* -* Common LZ5 definition -***************************************/ #include "mem.h" #include "lz5common.h" - - -/* ************************************* -* Local Constants -***************************************/ -#define DICTIONARY_LOGSIZE 22 -#define MAXD (1<> (32-h) ; } -static size_t LZ5HC_hash3Ptr(const void* ptr, U32 h) { return LZ5HC_hash3(MEM_read32(ptr), h); } - -static const U32 prime4bytes = 2654435761U; -static U32 LZ5HC_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } -static size_t LZ5HC_hash4Ptr(const void* ptr, U32 h) { return LZ5HC_hash4(MEM_read32(ptr), h); } - -static const U64 prime5bytes = 889523592379ULL; -static size_t LZ5HC_hash5(U64 u, U32 h) { return (size_t)((u * prime5bytes) << (64-40) >> (64-h)) ; } -static size_t LZ5HC_hash5Ptr(const void* p, U32 h) { return LZ5HC_hash5(MEM_read64(p), h); } - -static const U64 prime6bytes = 227718039650203ULL; -static size_t LZ5HC_hash6(U64 u, U32 h) { return (size_t)((u * prime6bytes) << (64-48) >> (64-h)) ; } -static size_t LZ5HC_hash6Ptr(const void* p, U32 h) { return LZ5HC_hash6(MEM_read64(p), h); } - -static const U64 prime7bytes = 58295818150454627ULL; -static size_t LZ5HC_hash7(U64 u, U32 h) { return (size_t)((u * prime7bytes) << (64-56) >> (64-h)) ; } -static size_t LZ5HC_hash7Ptr(const void* p, U32 h) { return LZ5HC_hash7(MEM_read64(p), h); } - -static size_t LZ5HC_hashPtr(const void* p, U32 hBits, U32 mls) -{ - switch(mls) - { - default: - case 4: return LZ5HC_hash4Ptr(p, hBits); - case 5: return LZ5HC_hash5Ptr(p, hBits); - case 6: return LZ5HC_hash6Ptr(p, hBits); - case 7: return LZ5HC_hash7Ptr(p, hBits); - } -} - - -/************************************** -* Local Macros -**************************************/ -#define LZ5HC_DEBUG(fmt, args...) ; //printf(fmt, ##args) -#define MAX(a,b) ((a)>(b))?(a):(b) - -#define LZ5_SHORT_LITLEN_COST(len) (len LZ5_MID_OFFSET_DISTANCE) || ((offset) /************************************** * HC Compression **************************************/ -FORCE_INLINE int LZ5_MORE_PROFITABLE(uint32_t best_off, uint32_t best_common, uint32_t off, uint32_t common, int literals, uint32_t last_off) +FORCE_INLINE int LZ5HC_more_profitable(uint32_t best_off, uint32_t best_common, uint32_t off, uint32_t common, int literals, uint32_t last_off) { int sum; @@ -187,11 +64,40 @@ FORCE_INLINE int LZ5_MORE_PROFITABLE(uint32_t best_off, uint32_t best_common, ui } +int LZ5_alloc_mem_HC(LZ5HC_Data_Structure* ctx, int compressionLevel) +{ + ctx->compressionLevel = compressionLevel; + ctx->params = LZ5HC_defaultParameters[1]; + + ctx->hashTable = ALLOCATOR(1, sizeof(U32)*((1 << ctx->params.hashLog3)+(1 << ctx->params.hashLog))); + if (!ctx->hashTable) + return 0; + + ctx->hashTable3 = ctx->hashTable + (1 << ctx->params.hashLog); + + ctx->chainTable = ALLOCATOR(1, sizeof(U32)*MAXD); + if (!ctx->chainTable) + { + FREEMEM(ctx->hashTable); + ctx->hashTable = NULL; + return 0; + } + + return 1; +} + +void LZ5_free_mem_HC(LZ5HC_Data_Structure* statePtr) +{ + if (statePtr->chainTable) FREEMEM(statePtr->chainTable); + if (statePtr->hashTable) FREEMEM(statePtr->hashTable); +} + + static void LZ5HC_init (LZ5HC_Data_Structure* ctx, const BYTE* start) { - MEM_INIT((void*)ctx->hashTable, 0, sizeof(U32)*HASHTABLESIZE); - MEM_INIT((void*)ctx->hashTable3, 0, sizeof(U32)*HASHTABLESIZE3); + MEM_INIT((void*)ctx->hashTable, 0, sizeof(U32)*((1 << ctx->params.hashLog) + (1 << ctx->params.hashLog3))); MEM_INIT(ctx->chainTable, 0xFF, sizeof(U32)*MAXD); + ctx->nextToUpdate = LZ5HC_LIMIT; ctx->base = start - LZ5HC_LIMIT; ctx->end = start; @@ -520,12 +426,10 @@ static int LZ5HC_compress_generic ( char* dest, int inputSize, int maxOutputSize, - int compressionLevel, limitedOutput_directive limit ) { LZ5HC_Data_Structure* ctx = (LZ5HC_Data_Structure*) ctxvoid; - ctx->params = LZ5HC_defaultParameters[compressionLevel]; ctx->inputBuffer = (BYTE*) source; ctx->outputBuffer = (BYTE*) dest; const BYTE* ip = (const BYTE*) source; @@ -547,6 +451,7 @@ static int LZ5HC_compress_generic ( const BYTE* lowPrefixPtr = ctx->base + ctx->dictLimit; /* init */ + int compressionLevel = ctx->compressionLevel; if (compressionLevel > g_maxCompressionLevel) compressionLevel = g_maxCompressionLevel; if (compressionLevel < 1) compressionLevel = LZ5HC_compressionLevel_default; maxNbAttempts = 1 << (compressionLevel-1); @@ -635,7 +540,7 @@ static int LZ5HC_compress_generic ( if (start0 < ip) { - if (LZ5_MORE_PROFITABLE(ip - ref, ml, start0 - ref0, ml0, ref0 - ref, ctx->last_off)) + if (LZ5HC_more_profitable(ip - ref, ml, start0 - ref0, ml0, ref0 - ref, ctx->last_off)) { ip = start0; ref = ref0; @@ -664,40 +569,16 @@ static int LZ5HC_compress_generic ( int LZ5_sizeofStateHC(void) { return sizeof(LZ5HC_Data_Structure); } -int LZ5_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel) +int LZ5_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */ LZ5HC_init ((LZ5HC_Data_Structure*)state, (const BYTE*)src); if (maxDstSize < LZ5_compressBound(srcSize)) - return LZ5HC_compress_generic (state, src, dst, srcSize, maxDstSize, compressionLevel, limitedOutput); + return LZ5HC_compress_generic (state, src, dst, srcSize, maxDstSize, limitedOutput); else - return LZ5HC_compress_generic (state, src, dst, srcSize, maxDstSize, compressionLevel, noLimit); + return LZ5HC_compress_generic (state, src, dst, srcSize, maxDstSize, noLimit); } -int LZ5_alloc_mem_HC(LZ5HC_Data_Structure* statePtr) -{ - statePtr->hashTable = ALLOCATOR(1, sizeof(U32)*(HASHTABLESIZE3+HASHTABLESIZE)); - if (!statePtr->hashTable) - return 0; - - statePtr->hashTable3 = statePtr->hashTable + HASHTABLESIZE; - - statePtr->chainTable = ALLOCATOR(1, sizeof(U32)*MAXD); - if (!statePtr->chainTable) - { - FREEMEM(statePtr->hashTable); - statePtr->hashTable = NULL; - return 0; - } - - return 1; -} - -void LZ5_free_mem_HC(LZ5HC_Data_Structure* statePtr) -{ - if (statePtr->chainTable) FREEMEM(statePtr->chainTable); - if (statePtr->hashTable) FREEMEM(statePtr->hashTable); -} int LZ5_compress_HC(const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel) { @@ -710,10 +591,10 @@ int LZ5_compress_HC(const char* src, char* dst, int srcSize, int maxDstSize, int int cSize = 0; - if (!LZ5_alloc_mem_HC(statePtr)) + if (!LZ5_alloc_mem_HC(statePtr, compressionLevel)) return 0; - cSize = LZ5_compress_HC_extStateHC(statePtr, src, dst, srcSize, maxDstSize, compressionLevel); + cSize = LZ5_compress_HC_extStateHC(statePtr, src, dst, srcSize, maxDstSize); LZ5_free_mem_HC(statePtr); @@ -729,13 +610,13 @@ int LZ5_compress_HC(const char* src, char* dst, int srcSize, int maxDstSize, int * Streaming Functions **************************************/ /* allocation */ -LZ5_streamHC_t* LZ5_createStreamHC(void) +LZ5_streamHC_t* LZ5_createStreamHC(int compressionLevel) { LZ5HC_Data_Structure* statePtr = (LZ5HC_Data_Structure*)malloc(sizeof(LZ5_streamHC_t)); if (!statePtr) return NULL; - if (!LZ5_alloc_mem_HC(statePtr)) + if (!LZ5_alloc_mem_HC(statePtr, compressionLevel)) { FREEMEM(statePtr); return NULL; @@ -754,11 +635,10 @@ int LZ5_freeStreamHC (LZ5_streamHC_t* LZ5_streamHCPtr) /* initialization */ -void LZ5_resetStreamHC (LZ5_streamHC_t* LZ5_streamHCPtr, int compressionLevel) +void LZ5_resetStreamHC (LZ5_streamHC_t* LZ5_streamHCPtr) { LZ5_STATIC_ASSERT(sizeof(LZ5HC_Data_Structure) <= sizeof(LZ5_streamHC_t)); /* if compilation fails here, LZ5_STREAMHCSIZE must be increased */ ((LZ5HC_Data_Structure*)LZ5_streamHCPtr)->base = NULL; - ((LZ5HC_Data_Structure*)LZ5_streamHCPtr)->compressionLevel = (unsigned)compressionLevel; } int LZ5_loadDictHC (LZ5_streamHC_t* LZ5_streamHCPtr, const char* dictionary, int dictSize) @@ -825,7 +705,7 @@ static int LZ5_compressHC_continue_generic (LZ5HC_Data_Structure* ctxPtr, } } - return LZ5HC_compress_generic (ctxPtr, source, dest, inputSize, maxOutputSize, ctxPtr->compressionLevel, limit); + return LZ5HC_compress_generic (ctxPtr, source, dest, inputSize, maxOutputSize, limit); } int LZ5_compress_HC_continue (LZ5_streamHC_t* LZ5_streamHCPtr, const char* source, char* dest, int inputSize, int maxOutputSize) @@ -867,5 +747,5 @@ int LZ5_compressHC(const char* src, char* dst, int srcSize) { return LZ5_compres int LZ5_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ5_compress_HC(src, dst, srcSize, maxDstSize, 0); } int LZ5_compressHC_continue (LZ5_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ5_compress_HC_continue (ctx, src, dst, srcSize, LZ5_compressBound(srcSize)); } int LZ5_compressHC_limitedOutput_continue (LZ5_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ5_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); } -int LZ5_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ5_compress_HC_extStateHC (state, src, dst, srcSize, LZ5_compressBound(srcSize), 0); } -int LZ5_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ5_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); } +int LZ5_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ5_compress_HC_extStateHC (state, src, dst, srcSize, LZ5_compressBound(srcSize)); } +int LZ5_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ5_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize); } diff --git a/lib/lz5hc.h b/lib/lz5hc.h index 23d37fc..b4ddbc5 100644 --- a/lib/lz5hc.h +++ b/lib/lz5hc.h @@ -68,11 +68,11 @@ LZ5_compress_HC : typedef struct LZ5HC_Data_s LZ5HC_Data_Structure; -int LZ5_alloc_mem_HC(LZ5HC_Data_Structure* statePtr); +int LZ5_alloc_mem_HC(LZ5HC_Data_Structure* statePtr, int compressionLevel); void LZ5_free_mem_HC(LZ5HC_Data_Structure* statePtr); int LZ5_sizeofStateHC(void); -int LZ5_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel); +int LZ5_compress_HC_extStateHC(void* state, const char* src, char* dst, int srcSize, int maxDstSize); /* LZ5_compress_HC_extStateHC() : Use this function if you prefer to manually allocate memory for compression tables. @@ -103,7 +103,7 @@ typedef struct { size_t table[LZ5_STREAMHCSIZE_SIZET]; } LZ5_streamHC_t; */ -LZ5_streamHC_t* LZ5_createStreamHC(void); +LZ5_streamHC_t* LZ5_createStreamHC(int compressionLevel); int LZ5_freeStreamHC (LZ5_streamHC_t* streamHCPtr); /* These functions create and release memory for LZ5 HC streaming state. @@ -113,7 +113,7 @@ int LZ5_freeStreamHC (LZ5_streamHC_t* streamHCPtr); to avoid size mismatch between different versions. */ -void LZ5_resetStreamHC (LZ5_streamHC_t* streamHCPtr, int compressionLevel); +void LZ5_resetStreamHC (LZ5_streamHC_t* streamHCPtr); int LZ5_loadDictHC (LZ5_streamHC_t* streamHCPtr, const char* dictionary, int dictSize); int LZ5_compress_HC_continue (LZ5_streamHC_t* streamHCPtr, const char* src, char* dst, int srcSize, int maxDstSize); diff --git a/programs/fullbench.c b/programs/fullbench.c index 8b71335..d09679e 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -471,7 +471,7 @@ static int local_LZ5_compress_forceDict(const char* in, char* out, int inSize) LZ5_streamHC_t LZ5_streamHC; static void local_LZ5_resetStreamHC(void) { - LZ5_resetStreamHC(&LZ5_streamHC, 0); + LZ5_resetStreamHC(&LZ5_streamHC); } static int local_LZ5_saveDictHC(const char* in, char* out, int inSize) From a6931e4c8a2ddd8b861db90a92b5a53de8cb5c2b Mon Sep 17 00:00:00 2001 From: inikep Date: Sat, 28 Nov 2015 12:02:27 +0100 Subject: [PATCH 11/22] working with LZ5HC_parameters --- lib/lz5common.h | 22 ++++------------------ lib/lz5hc.c | 31 ++++++++++++++++++------------- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/lib/lz5common.h b/lib/lz5common.h index 33c683d..14ea412 100644 --- a/lib/lz5common.h +++ b/lib/lz5common.h @@ -108,22 +108,6 @@ static const int LZ5_minLength = (MFLIMIT+1); - - -/* ************************************* -* HC Constants -***************************************/ -#define DICTIONARY_LOGSIZE 22 -#define MAXD (1<(b))?(a):(b) +#define LZ5_SHORT_LITERALS ((1<hashTable3 = ctx->hashTable + (1 << ctx->params.hashLog); - ctx->chainTable = ALLOCATOR(1, sizeof(U32)*MAXD); + ctx->chainTable = ALLOCATOR(1, sizeof(U32)*(1 << ctx->params.contentLog)); if (!ctx->chainTable) { FREEMEM(ctx->hashTable); ctx->hashTable = NULL; return 0; } - + return 1; } @@ -96,14 +96,14 @@ void LZ5_free_mem_HC(LZ5HC_Data_Structure* statePtr) static void LZ5HC_init (LZ5HC_Data_Structure* ctx, const BYTE* start) { MEM_INIT((void*)ctx->hashTable, 0, sizeof(U32)*((1 << ctx->params.hashLog) + (1 << ctx->params.hashLog3))); - MEM_INIT(ctx->chainTable, 0xFF, sizeof(U32)*MAXD); + MEM_INIT(ctx->chainTable, 0xFF, sizeof(U32)*(1 << ctx->params.contentLog)); - ctx->nextToUpdate = LZ5HC_LIMIT; - ctx->base = start - LZ5HC_LIMIT; + ctx->nextToUpdate = (1 << ctx->params.windowLog); + ctx->base = start - (1 << ctx->params.windowLog); ctx->end = start; - ctx->dictBase = start - LZ5HC_LIMIT; - ctx->dictLimit = LZ5HC_LIMIT; - ctx->lowLimit = LZ5HC_LIMIT; + ctx->dictBase = start - (1 << ctx->params.windowLog); + ctx->dictLimit = (1 << ctx->params.windowLog); + ctx->lowLimit = (1 << ctx->params.windowLog); ctx->last_off = 1; } @@ -118,12 +118,13 @@ FORCE_INLINE void LZ5HC_Insert (LZ5HC_Data_Structure* ctx, const BYTE* ip) #endif const BYTE* const base = ctx->base; const U32 target = (U32)(ip - base); + const U32 contentMask = (1 << ctx->params.contentLog) - 1; U32 idx = ctx->nextToUpdate; while(idx < target) { U32 h = LZ5HC_hashPtr(base+idx, ctx->params.hashLog, ctx->params.searchLength); - chainTable[idx & MAXD_MASK] = (U32)(idx - HashTable[h]); + chainTable[idx & contentMask] = (U32)(idx - HashTable[h]); HashTable[h] = idx; #if MINMATCH == 3 HashTable3[LZ5HC_hash3Ptr(base+idx, ctx->params.hashLog3)] = idx; @@ -146,7 +147,9 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* ctx, /* I const BYTE* const base = ctx->base; const BYTE* const dictBase = ctx->dictBase; const U32 dictLimit = ctx->dictLimit; - const U32 lowLimit = (ctx->lowLimit + LZ5HC_LIMIT > (U32)(ip-base)) ? ctx->lowLimit : (U32)(ip - base) - (LZ5HC_LIMIT - 1); + const U32 maxDistance = (1 << ctx->params.windowLog); + const U32 lowLimit = (ctx->lowLimit + maxDistance > (U32)(ip-base)) ? ctx->lowLimit : (U32)(ip - base) - (maxDistance - 1); + const U32 contentMask = (1 << ctx->params.contentLog) - 1; U32 matchIndex; const BYTE* match; int nbAttempts=maxNbAttempts; @@ -206,7 +209,7 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* ctx, /* I { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ } } - matchIndex -= chainTable[matchIndex & MAXD_MASK]; + matchIndex -= chainTable[matchIndex & contentMask]; } return (int)ml; @@ -229,7 +232,9 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( const BYTE* const base = ctx->base; const U32 dictLimit = ctx->dictLimit; const BYTE* const lowPrefixPtr = base + dictLimit; - const U32 lowLimit = (ctx->lowLimit + LZ5HC_LIMIT > (U32)(ip-base)) ? ctx->lowLimit : (U32)(ip - base) - (LZ5HC_LIMIT - 1); + const U32 maxDistance = (1 << ctx->params.windowLog); + const U32 lowLimit = (ctx->lowLimit + maxDistance > (U32)(ip-base)) ? ctx->lowLimit : (U32)(ip - base) - (maxDistance - 1); + const U32 contentMask = (1 << ctx->params.contentLog) - 1; const BYTE* const dictBase = ctx->dictBase; const BYTE* match; U32 matchIndex; @@ -328,7 +333,7 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; } } } - matchIndex -= chainTable[matchIndex & MAXD_MASK]; + matchIndex -= chainTable[matchIndex & contentMask]; } From 6773089a159029df79fb14febeab558d271c8bab Mon Sep 17 00:00:00 2001 From: inikep Date: Sat, 28 Nov 2015 12:18:14 +0100 Subject: [PATCH 12/22] compr level tunning --- lib/lz5common.h | 17 +++++++++++------ lib/lz5hc.c | 26 +++++++++++--------------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/lib/lz5common.h b/lib/lz5common.h index 14ea412..8b3c1a3 100644 --- a/lib/lz5common.h +++ b/lib/lz5common.h @@ -212,17 +212,22 @@ struct LZ5HC_Data_s /* ************************************* * HC Pre-defined compression levels ***************************************/ -static const int g_maxCompressionLevel = 12; -static const int LZ5HC_compressionLevel_default = 9; +#define LZ5HC_MAX_CLEVEL 8 + +static const int g_maxCompressionLevel = LZ5HC_MAX_CLEVEL-1; +static const int LZ5HC_compressionLevel_default = 5; -#define LZ5HC_MAX_CLEVEL 4 static const LZ5HC_parameters LZ5HC_defaultParameters[LZ5HC_MAX_CLEVEL] = { /* W, C, H, H3, S, L, strat */ { 0, 0, 0, 0, 0, 0, LZ5HC_fast }, /* level 0 - never used */ - { 22, 22, 23, 16, 1, 4, LZ5HC_fast }, /* level 1 */ - { 17, 15, 16, 13, 1, 4, LZ5HC_fast }, /* level 2 */ - { 17, 16, 17, 16, 1, 4, LZ5HC_fast } /* level 3 */ + { 22, 22, 15, 13, 1, 4, LZ5HC_fast }, /* level 1 */ + { 22, 22, 17, 13, 1, 4, LZ5HC_fast }, /* level 2 */ + { 22, 22, 19, 16, 1, 4, LZ5HC_fast }, /* level 3 */ + { 22, 22, 23, 16, 3, 4, LZ5HC_fast }, /* level 4 */ + { 22, 22, 23, 16, 8, 4, LZ5HC_fast }, /* level 5 */ + { 22, 22, 23, 16, 16, 4, LZ5HC_fast }, /* level 6 */ + { 22, 22, 23, 16, 32, 4, LZ5HC_fast }, /* level 7 */ }; diff --git a/lib/lz5hc.c b/lib/lz5hc.c index efbb8ec..a4cb934 100644 --- a/lib/lz5hc.c +++ b/lib/lz5hc.c @@ -66,8 +66,11 @@ FORCE_INLINE int LZ5HC_more_profitable(uint32_t best_off, uint32_t best_common, int LZ5_alloc_mem_HC(LZ5HC_Data_Structure* ctx, int compressionLevel) { - ctx->compressionLevel = compressionLevel; - ctx->params = LZ5HC_defaultParameters[1]; + ctx->compressionLevel = compressionLevel; + if (compressionLevel > g_maxCompressionLevel) ctx->compressionLevel = g_maxCompressionLevel; + if (compressionLevel < 1) ctx->compressionLevel = LZ5HC_compressionLevel_default; + + ctx->params = LZ5HC_defaultParameters[ctx->compressionLevel]; ctx->hashTable = ALLOCATOR(1, sizeof(U32)*((1 << ctx->params.hashLog3)+(1 << ctx->params.hashLog))); if (!ctx->hashTable) @@ -139,8 +142,7 @@ FORCE_INLINE void LZ5HC_Insert (LZ5HC_Data_Structure* ctx, const BYTE* ip) FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* ctx, /* Index table will be updated */ const BYTE* ip, const BYTE* const iLimit, - const BYTE** matchpos, - const int maxNbAttempts) + const BYTE** matchpos) { U32* const chainTable = ctx->chainTable; U32* const HashTable = ctx->hashTable; @@ -152,7 +154,7 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* ctx, /* I const U32 contentMask = (1 << ctx->params.contentLog) - 1; U32 matchIndex; const BYTE* match; - int nbAttempts=maxNbAttempts; + int nbAttempts=ctx->params.searchNum; size_t ml=0, mlt; /* HC4 match finder */ @@ -224,8 +226,7 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( const BYTE* const iHighLimit, int longest, const BYTE** matchpos, - const BYTE** startpos, - const int maxNbAttempts) + const BYTE** startpos) { U32* const chainTable = ctx->chainTable; U32* const HashTable = ctx->hashTable; @@ -238,7 +239,7 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( const BYTE* const dictBase = ctx->dictBase; const BYTE* match; U32 matchIndex; - int nbAttempts = maxNbAttempts; + int nbAttempts = ctx->params.searchNum; /* First Match */ @@ -446,7 +447,6 @@ static int LZ5HC_compress_generic ( BYTE* op = (BYTE*) dest; BYTE* const oend = op + maxOutputSize; - unsigned maxNbAttempts; int ml, ml2, ml0; const BYTE* ref=NULL; const BYTE* start2=NULL; @@ -456,10 +456,6 @@ static int LZ5HC_compress_generic ( const BYTE* lowPrefixPtr = ctx->base + ctx->dictLimit; /* init */ - int compressionLevel = ctx->compressionLevel; - if (compressionLevel > g_maxCompressionLevel) compressionLevel = g_maxCompressionLevel; - if (compressionLevel < 1) compressionLevel = LZ5HC_compressionLevel_default; - maxNbAttempts = 1 << (compressionLevel-1); ctx->end += inputSize; ip++; @@ -467,7 +463,7 @@ static int LZ5HC_compress_generic ( /* Main Loop */ while (ip < mflimit) { - ml = LZ5HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref), maxNbAttempts); + ml = LZ5HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref)); if (!ml) { ip++; continue; } int back = 0; @@ -484,7 +480,7 @@ static int LZ5HC_compress_generic ( _Search: if (ip+ml >= mflimit) goto _Encode; - ml2 = LZ5HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, anchor, matchlimit, 0, &ref2, &start2, maxNbAttempts); + ml2 = LZ5HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, anchor, matchlimit, 0, &ref2, &start2); if (ml2 == 0) goto _Encode; From f8d41361a94d6e04e4691bffacc63cba95134202 Mon Sep 17 00:00:00 2001 From: inikep Date: Sat, 28 Nov 2015 16:10:47 +0100 Subject: [PATCH 13/22] LZ5HC_compress_lowest_fast --- lib/lz5common.h | 29 ++--- lib/lz5hc.c | 278 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 282 insertions(+), 25 deletions(-) diff --git a/lib/lz5common.h b/lib/lz5common.h index 8b3c1a3..703347f 100644 --- a/lib/lz5common.h +++ b/lib/lz5common.h @@ -133,7 +133,7 @@ static const U64 prime6bytes = 227718039650203ULL; static size_t LZ5HC_hash6(U64 u, U32 h) { return (size_t)((u * prime6bytes) << (64-48) >> (64-h)) ; } static size_t LZ5HC_hash6Ptr(const void* p, U32 h) { return LZ5HC_hash6(MEM_read64(p), h); } -static const U64 prime7bytes = 58295818150454627ULL; +static const U64 prime7bytes = 58295818150454627ULL; static size_t LZ5HC_hash7(U64 u, U32 h) { return (size_t)((u * prime7bytes) << (64-56) >> (64-h)) ; } static size_t LZ5HC_hash7Ptr(const void* p, U32 h) { return LZ5HC_hash7(MEM_read64(p), h); } @@ -176,7 +176,7 @@ static size_t LZ5HC_hashPtr(const void* p, U32 hBits, U32 mls) * HC Types ***************************************/ /** from faster to stronger */ -typedef enum { LZ5HC_fast, LZ5HC_greedy, LZ5HC_lazy, LZ5HC_lazy2, LZ5HC_lazymax, LZ5HC_btlazy2 } LZ5HC_strategy; +typedef enum { LZ5HC_lowest_fast, LZ5HC_lowest_price } LZ5HC_strategy; typedef struct { @@ -212,25 +212,26 @@ struct LZ5HC_Data_s /* ************************************* * HC Pre-defined compression levels ***************************************/ -#define LZ5HC_MAX_CLEVEL 8 +#define LZ5HC_MAX_CLEVEL 9 -static const int g_maxCompressionLevel = LZ5HC_MAX_CLEVEL-1; +static const int g_maxCompressionLevel = LZ5HC_MAX_CLEVEL; static const int LZ5HC_compressionLevel_default = 5; -static const LZ5HC_parameters LZ5HC_defaultParameters[LZ5HC_MAX_CLEVEL] = +static const LZ5HC_parameters LZ5HC_defaultParameters[LZ5HC_MAX_CLEVEL+1] = { /* W, C, H, H3, S, L, strat */ - { 0, 0, 0, 0, 0, 0, LZ5HC_fast }, /* level 0 - never used */ - { 22, 22, 15, 13, 1, 4, LZ5HC_fast }, /* level 1 */ - { 22, 22, 17, 13, 1, 4, LZ5HC_fast }, /* level 2 */ - { 22, 22, 19, 16, 1, 4, LZ5HC_fast }, /* level 3 */ - { 22, 22, 23, 16, 3, 4, LZ5HC_fast }, /* level 4 */ - { 22, 22, 23, 16, 8, 4, LZ5HC_fast }, /* level 5 */ - { 22, 22, 23, 16, 16, 4, LZ5HC_fast }, /* level 6 */ - { 22, 22, 23, 16, 32, 4, LZ5HC_fast }, /* level 7 */ + { 0, 0, 0, 0, 0, 0, 0 }, /* level 0 - never used */ + { 22, 22, 13, 13, 1, 4, LZ5HC_lowest_price }, /* level 1 */ + { 22, 22, 15, 13, 1, 4, LZ5HC_lowest_price }, /* level 2 */ + { 22, 22, 17, 13, 1, 4, LZ5HC_lowest_price }, /* level 3 */ + { 22, 22, 13, 13, 1, 4, LZ5HC_lowest_fast }, /* level 4 */ + { 22, 22, 17, 13, 1, 4, LZ5HC_lowest_fast }, /* level 5 */ + { 22, 22, 19, 16, 1, 4, LZ5HC_lowest_price }, /* level 6 */ + { 22, 22, 23, 16, 3, 4, LZ5HC_lowest_price }, /* level 7 */ + { 22, 22, 23, 16, 8, 4, LZ5HC_lowest_price }, /* level 8 */ + { 22, 22, 23, 16, 32, 4, LZ5HC_lowest_price }, /* level 9 */ }; - #if defined (__cplusplus) } #endif diff --git a/lib/lz5hc.c b/lib/lz5hc.c index a4cb934..7c52b7b 100644 --- a/lib/lz5hc.c +++ b/lib/lz5hc.c @@ -138,9 +138,8 @@ FORCE_INLINE void LZ5HC_Insert (LZ5HC_Data_Structure* ctx, const BYTE* ip) ctx->nextToUpdate = target; } - -FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* ctx, /* Index table will be updated */ +FORCE_INLINE int LZ5HC_FindBestMatch (LZ5HC_Data_Structure* ctx, /* Index table will be updated */ const BYTE* ip, const BYTE* const iLimit, const BYTE** matchpos) { @@ -157,8 +156,6 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* ctx, /* I int nbAttempts=ctx->params.searchNum; size_t ml=0, mlt; - /* HC4 match finder */ - LZ5HC_Insert(ctx, ip); matchIndex = HashTable[LZ5HC_hashPtr(ip, ctx->params.hashLog, ctx->params.searchLength)]; match = ip - ctx->last_off; @@ -218,8 +215,122 @@ FORCE_INLINE int LZ5HC_InsertAndFindBestMatch (LZ5HC_Data_Structure* ctx, /* I } +FORCE_INLINE int LZ5HC_FindBestMatchFast (LZ5HC_Data_Structure* ctx, U32 matchIndex, U32 matchIndex3, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + const BYTE** matchpos) +{ + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const U32 dictLimit = ctx->dictLimit; + const U32 maxDistance = (1 << ctx->params.windowLog); + const U32 lowLimit = (ctx->lowLimit + maxDistance > (U32)(ip-base)) ? ctx->lowLimit : (U32)(ip - base) - (maxDistance - 1); + const BYTE* match; + size_t ml=0, mlt; + + match = ip - ctx->last_off; + if (MEM_read24(match) == MEM_read24(ip)) + { + ml = MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + *matchpos = match; + return (int)ml; + } + +#if MINMATCH == 3 + size_t offset = ip - base - matchIndex3; + if (offset > 0 && offset < LZ5_SHORT_OFFSET_DISTANCE) + { + match = ip - offset; + if (match > base && MEM_read24(ip) == MEM_read24(match)) + { + ml = 3;//MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + *matchpos = match; + } + } +#endif + + if (matchIndex>=lowLimit) + { + if (matchIndex >= dictLimit) + { + match = base + matchIndex; + if (match < ip && *(match+ml) == *(ip+ml) && (MEM_read32(match) == MEM_read32(ip))) + { + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + if (ml==0 || ((mlt > ml) && LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml)))) + { ml = mlt; *matchpos = match; } + } + } + else + { + match = dictBase + matchIndex; + if (MEM_read32(match) == MEM_read32(ip)) + { + const BYTE* vLimit = ip + (dictLimit - matchIndex); + if (vLimit > iLimit) vLimit = iLimit; + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; + if ((ip+mlt == vLimit) && (vLimit < iLimit)) + mlt += MEM_count(ip+mlt, base+dictLimit, iLimit); + if (ml==0 || ((mlt > ml) && LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml)))) + { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ + } + } + } + + return (int)ml; +} + + +FORCE_INLINE int LZ5HC_FindBestMatchFaster (LZ5HC_Data_Structure* ctx, U32 matchIndex, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + const BYTE** matchpos) +{ + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const U32 dictLimit = ctx->dictLimit; + const U32 maxDistance = (1 << ctx->params.windowLog); + const U32 lowLimit = (ctx->lowLimit + maxDistance > (U32)(ip-base)) ? ctx->lowLimit : (U32)(ip - base) - (maxDistance - 1); + const BYTE* match; + size_t ml=0, mlt; + + match = ip - ctx->last_off; + if (MEM_read24(match) == MEM_read24(ip)) + { + ml = MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + *matchpos = match; + return (int)ml; + } + + if (matchIndex>=lowLimit) + { + if (matchIndex >= dictLimit) + { + match = base + matchIndex; + if (match < ip && *(match+ml) == *(ip+ml) && (MEM_read32(match) == MEM_read32(ip))) + { + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + if (mlt > ml) { ml = mlt; *matchpos = match; } + } + } + else + { + match = dictBase + matchIndex; + if (MEM_read32(match) == MEM_read32(ip)) + { + const BYTE* vLimit = ip + (dictLimit - matchIndex); + if (vLimit > iLimit) vLimit = iLimit; + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; + if ((ip+mlt == vLimit) && (vLimit < iLimit)) + mlt += MEM_count(ip+mlt, base+dictLimit, iLimit); + if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ + } + } + } + + return (int)ml; +} + -FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( +FORCE_INLINE int LZ5HC_GetWiderMatch ( LZ5HC_Data_Structure* ctx, const BYTE* const ip, const BYTE* const iLowLimit, @@ -243,7 +354,6 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( /* First Match */ - LZ5HC_Insert(ctx, ip); matchIndex = HashTable[LZ5HC_hashPtr(ip, ctx->params.hashLog, ctx->params.searchLength)]; match = ip - ctx->last_off; @@ -342,6 +452,7 @@ FORCE_INLINE int LZ5HC_InsertAndGetWiderMatch ( } + typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive; /* @@ -426,8 +537,8 @@ FORCE_INLINE int LZ5HC_encodeSequence ( } -static int LZ5HC_compress_generic ( - void* ctxvoid, +static int LZ5HC_compress_lowest_price ( + LZ5HC_Data_Structure* ctx, const char* source, char* dest, int inputSize, @@ -435,7 +546,6 @@ static int LZ5HC_compress_generic ( limitedOutput_directive limit ) { - LZ5HC_Data_Structure* ctx = (LZ5HC_Data_Structure*) ctxvoid; ctx->inputBuffer = (BYTE*) source; ctx->outputBuffer = (BYTE*) dest; const BYTE* ip = (const BYTE*) source; @@ -463,7 +573,8 @@ static int LZ5HC_compress_generic ( /* Main Loop */ while (ip < mflimit) { - ml = LZ5HC_InsertAndFindBestMatch (ctx, ip, matchlimit, (&ref)); + LZ5HC_Insert(ctx, ip); + ml = LZ5HC_FindBestMatch (ctx, ip, matchlimit, (&ref)); if (!ml) { ip++; continue; } int back = 0; @@ -480,7 +591,8 @@ static int LZ5HC_compress_generic ( _Search: if (ip+ml >= mflimit) goto _Encode; - ml2 = LZ5HC_InsertAndGetWiderMatch(ctx, ip + ml - 2, anchor, matchlimit, 0, &ref2, &start2); + LZ5HC_Insert(ctx, ip); + ml2 = LZ5HC_GetWiderMatch(ctx, ip + ml - 2, anchor, matchlimit, 0, &ref2, &start2); if (ml2 == 0) goto _Encode; @@ -568,6 +680,150 @@ static int LZ5HC_compress_generic ( +static int LZ5HC_compress_lowest_fast ( + LZ5HC_Data_Structure* ctx, + const char* source, + char* dest, + int inputSize, + int maxOutputSize, + limitedOutput_directive limit + ) +{ + ctx->inputBuffer = (BYTE*) source; + ctx->outputBuffer = (BYTE*) dest; + const BYTE* ip = (const BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = (iend - LASTLITERALS); + + BYTE* op = (BYTE*) dest; + BYTE* const oend = op + maxOutputSize; + + int ml, ml2=0; + const BYTE* ref=NULL; + const BYTE* start2=NULL; + const BYTE* ref2=NULL; + const BYTE* lowPrefixPtr = ctx->base + ctx->dictLimit; + U32* HashTable = ctx->hashTable; +#if MINMATCH == 3 + U32* HashTable3 = ctx->hashTable3; +#endif + const BYTE* const base = ctx->base; + U32* HashPos, *HashPos3; + + /* init */ + ctx->end += inputSize; + + ip++; + + /* Main Loop */ + while (ip < mflimit) + { + HashPos = &HashTable[LZ5HC_hashPtr(ip, ctx->params.hashLog, ctx->params.searchLength)]; + HashPos3 = &HashTable3[LZ5HC_hash3Ptr(ip, ctx->params.hashLog3)]; + ml = LZ5HC_FindBestMatchFast (ctx, *HashPos, *HashPos3, ip, matchlimit, (&ref)); + *HashPos = (U32)(ip - base); +#if MINMATCH == 3 + *HashPos3 = (U32)(ip - base); +#endif + if (!ml) { ip++; continue; } + + if (ip - ref == ctx->last_off) { ml2=0; goto _Encode; } + + int back = 0; + while ((ip+back>anchor) && (ref+back > lowPrefixPtr) && (ip[back-1] == ref[back-1])) back--; + ml -= back; + ip += back; + ref += back; + +_Search: + if (ip+ml >= mflimit) goto _Encode; + + start2 = ip + ml - 2; + HashPos = &HashTable[LZ5HC_hashPtr(start2, ctx->params.hashLog, ctx->params.searchLength)]; + ml2 = LZ5HC_FindBestMatchFaster(ctx, *HashPos, start2, matchlimit, (&ref2)); + *HashPos = (U32)(start2 - base); + if (!ml2) goto _Encode; + + back = 0; + while ((start2+back>ip) && (ref2+back > lowPrefixPtr) && (start2[back-1] == ref2[back-1])) back--; + ml2 -= back; + start2 += back; + ref2 += back; + + // LZ5HC_DEBUG("%u: TRY last_off=%d literals=%u off=%u mlen=%u literals2=%u off2=%u mlen2=%u best=%d\n", (U32)(ip - ctx->inputBuffer), ctx->last_off, (U32)(ip - anchor), off0, (U32)ml, (U32)(start2 - anchor), off1, ml2, (U32)(best_pos - ip)); + + if (ml2 <= ml) { ml2 = 0; goto _Encode; } + + if (start2 <= ip) + { + ip = start2; ref = ref2; ml = ml2; + ml2 = 0; + goto _Encode; + } + + if (start2 - ip < 3) + { + ip = start2; ref = ref2; ml = ml2; + ml2 = 0; + goto _Search; + } + + + if (start2 < ip + ml) + { + int correction = ml - (int)(start2 - ip); + start2 += correction; + ref2 += correction; + ml2 -= correction; + if (ml2 < 3) { ml2 = 0; } + } + +_Encode: + if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml, ref, limit, oend)) return 0; + + if (ml2) + { + ip = start2; ref = ref2; ml = ml2; + ml2 = 0; + goto _Search; + } + } + + /* Encode Last Literals */ + { + int lastRun = (int)(iend - anchor); + if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */ + if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } + else *op++ = (BYTE)(lastRun<params.strategy) + { + default: + case LZ5HC_lowest_fast: + return LZ5HC_compress_lowest_fast(ctx, source, dest, inputSize, maxOutputSize, limit); + case LZ5HC_lowest_price: + return LZ5HC_compress_lowest_price(ctx, source, dest, inputSize, maxOutputSize, limit); + } + + return 0; +} + + int LZ5_sizeofStateHC(void) { return sizeof(LZ5HC_Data_Structure); } int LZ5_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) From 73d4e5764afc60f4f630965a2cd4d7e9964d7ecb Mon Sep 17 00:00:00 2001 From: inikep Date: Sat, 28 Nov 2015 18:54:43 +0100 Subject: [PATCH 14/22] slightly faster --- lib/lz5common.h | 26 ++++++++++++++-- lib/lz5hc.c | 80 +++++++++++++++++++++---------------------------- 2 files changed, 58 insertions(+), 48 deletions(-) diff --git a/lib/lz5common.h b/lib/lz5common.h index 703347f..38b0f68 100644 --- a/lib/lz5common.h +++ b/lib/lz5common.h @@ -162,8 +162,9 @@ static size_t LZ5HC_hashPtr(const void* p, U32 hBits, U32 mls) #define LZ5_SHORT_LITLEN_COST(len) (len LZ5_MID_OFFSET_DISTANCE) || ((offset) LZ5_MID_OFFSET_DISTANCE) || (offset 0) + sum = MAX(common + literals, best_common); + else + sum = MAX(common, best_common - literals); + +// return LZ5_CODEWORD_COST(sum - common, (off == last_off) ? 0 : (off), common - MINMATCH) <= LZ5_CODEWORD_COST(sum - best_common, (best_off == last_off) ? 0 : (best_off), best_common - MINMATCH); + return LZ5_NORMAL_MATCH_COST(common - MINMATCH, (off == last_off) ? 0 : off) + LZ5_NORMAL_LIT_COST(sum - common) <= LZ5_NORMAL_MATCH_COST(best_common - MINMATCH, (best_off == last_off) ? 0 : (best_off)) + LZ5_NORMAL_LIT_COST(sum - best_common); +} + + + /* ************************************* * HC Types ***************************************/ diff --git a/lib/lz5hc.c b/lib/lz5hc.c index 7c52b7b..9d2e246 100644 --- a/lib/lz5hc.c +++ b/lib/lz5hc.c @@ -50,19 +50,6 @@ * HC Compression **************************************/ -FORCE_INLINE int LZ5HC_more_profitable(uint32_t best_off, uint32_t best_common, uint32_t off, uint32_t common, int literals, uint32_t last_off) -{ - int sum; - - if (literals > 0) - sum = MAX(common + literals, best_common); - else - sum = MAX(common, best_common - literals); - -// return LZ5_CODEWORD_COST(sum - common, (off == last_off) ? 0 : (off), common - MINMATCH) <= LZ5_CODEWORD_COST(sum - best_common, (best_off == last_off) ? 0 : (best_off), best_common - MINMATCH); - return LZ5_NORMAL_MATCH_COST(common - MINMATCH, (off == last_off) ? 0 : (off)) + LZ5_NORMAL_LIT_COST(sum - common) <= LZ5_NORMAL_MATCH_COST(best_common - MINMATCH, (best_off == last_off) ? 0 : (best_off)) + LZ5_NORMAL_LIT_COST(sum - best_common); -} - int LZ5_alloc_mem_HC(LZ5HC_Data_Structure* ctx, int compressionLevel) { @@ -188,8 +175,8 @@ FORCE_INLINE int LZ5HC_FindBestMatch (LZ5HC_Data_Structure* ctx, /* Index tabl if (match < ip && *(match+ml) == *(ip+ml) && (MEM_read32(match) == MEM_read32(ip))) { mlt = MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; - if (mlt > ml) - if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml))) + if (!ml || (mlt > ml && LZ5HC_better_price(ip - *matchpos, ml, ip - match, mlt, ctx->last_off))) +// if (mlt > ml && (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml)))) { ml = mlt; *matchpos = match; } } } @@ -203,8 +190,8 @@ FORCE_INLINE int LZ5HC_FindBestMatch (LZ5HC_Data_Structure* ctx, /* Index tabl mlt = MEM_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iLimit)) mlt += MEM_count(ip+mlt, base+dictLimit, iLimit); - if (mlt > ml) - if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml))) + if (!ml || (mlt > ml && LZ5HC_better_price(ip - *matchpos, ml, ip - match, mlt, ctx->last_off))) + // if (mlt > ml && (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml)))) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ } } @@ -256,7 +243,8 @@ FORCE_INLINE int LZ5HC_FindBestMatchFast (LZ5HC_Data_Structure* ctx, U32 matchIn if (match < ip && *(match+ml) == *(ip+ml) && (MEM_read32(match) == MEM_read32(ip))) { mlt = MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; - if (ml==0 || ((mlt > ml) && LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml)))) + if (!ml || (mlt > ml && LZ5HC_better_price(ip - *matchpos, ml, ip - match, mlt, ctx->last_off))) + // if (ml==0 || ((mlt > ml) && LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml)))) { ml = mlt; *matchpos = match; } } } @@ -270,7 +258,8 @@ FORCE_INLINE int LZ5HC_FindBestMatchFast (LZ5HC_Data_Structure* ctx, U32 matchIn mlt = MEM_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iLimit)) mlt += MEM_count(ip+mlt, base+dictLimit, iLimit); - if (ml==0 || ((mlt > ml) && LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml)))) + if (!ml || (mlt > ml && LZ5HC_better_price(ip - *matchpos, ml, ip - match, mlt, ctx->last_off))) +// if (ml==0 || ((mlt > ml) && LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(ml - MINMATCH, (ip - *matchpos == ctx->last_off) ? 0 : (ip - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - ml)))) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ } } @@ -387,8 +376,8 @@ FORCE_INLINE int LZ5HC_GetWiderMatch ( while ((ip+back>iLowLimit) && (match+back > lowPrefixPtr) && (ip[back-1] == match[back-1])) back--; mlt -= back; - if (mlt > longest) - if (!longest || LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip+back - *matchpos == ctx->last_off) ? 0 : (ip+back - *matchpos)) + LZ5_NORMAL_LIT_COST(mlt - longest)) + if (!longest || (mlt > longest && LZ5HC_better_price(ip+back - *matchpos, longest, ip - match, mlt, ctx->last_off))) +// if (!longest || (mlt > longest && LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - match == ctx->last_off) ? 0 : (ip - match)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip+back - *matchpos == ctx->last_off) ? 0 : (ip+back - *matchpos)) + LZ5_NORMAL_LIT_COST(mlt - longest))) { *matchpos = match+back; *startpos = ip+back; @@ -403,43 +392,42 @@ FORCE_INLINE int LZ5HC_GetWiderMatch ( nbAttempts--; if (matchIndex >= dictLimit) { - const BYTE* matchPtr = base + matchIndex; - // if (*(ip + longest) == *(matchPtr + longest)) - - if (matchPtr < ip && MEM_read32(matchPtr) == MEM_read32(ip)) + match = base + matchIndex; + + // if (*(ip + longest) == *(matchPtr + longest)) + if (match < ip && MEM_read32(match) == MEM_read32(ip)) + { + int mlt = MINMATCH + MEM_count(ip+MINMATCH, match+MINMATCH, iHighLimit); + int back = 0; + + while ((ip+back>iLowLimit) + && (match+back > lowPrefixPtr) + && (ip[back-1] == match[back-1])) + back--; + + mlt -= back; + + if (!longest || (mlt > longest && LZ5HC_better_price(ip+back - *matchpos, longest, ip - match, mlt, ctx->last_off))) { - int mlt = MINMATCH + MEM_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); - int back = 0; - - while ((ip+back>iLowLimit) - && (matchPtr+back > lowPrefixPtr) - && (ip[back-1] == matchPtr[back-1])) - back--; - - mlt -= back; - - if (mlt > longest) - if (LZ5_NORMAL_MATCH_COST(mlt - MINMATCH, (ip - matchPtr == ctx->last_off) ? 0 : (ip - matchPtr)) < LZ5_NORMAL_MATCH_COST(longest - MINMATCH, (ip+back - *matchpos == ctx->last_off) ? 0 : (ip+back - *matchpos)) + (LZ5_NORMAL_LIT_COST(mlt - longest) )) - { - longest = (int)mlt; - *matchpos = matchPtr+back; - *startpos = ip+back; - } + longest = (int)mlt; + *matchpos = match+back; + *startpos = ip+back; } + } } else { - const BYTE* matchPtr = dictBase + matchIndex; - if (MEM_read32(matchPtr) == MEM_read32(ip)) + const BYTE* match = dictBase + matchIndex; + if (MEM_read32(match) == MEM_read32(ip)) { size_t mlt; int back=0; const BYTE* vLimit = ip + (dictLimit - matchIndex); if (vLimit > iHighLimit) vLimit = iHighLimit; - mlt = MEM_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; if ((ip+mlt == vLimit) && (vLimit < iHighLimit)) mlt += MEM_count(ip+mlt, base+dictLimit, iHighLimit); - while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == matchPtr[back-1])) back--; + while ((ip+back > iLowLimit) && (matchIndex+back > lowLimit) && (ip[back-1] == match[back-1])) back--; mlt -= back; if ((int)mlt > longest) { longest = (int)mlt; *matchpos = base + matchIndex + back; *startpos = ip+back; } } From 5beec259358d865960d2223c0aa525d3cfabc01a Mon Sep 17 00:00:00 2001 From: inikep Date: Sat, 28 Nov 2015 20:40:01 +0100 Subject: [PATCH 15/22] LZ5HC_compress_fast --- lib/lz5common.h | 37 ++++++++++----- lib/lz5hc.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 139 insertions(+), 15 deletions(-) diff --git a/lib/lz5common.h b/lib/lz5common.h index 38b0f68..c5582cb 100644 --- a/lib/lz5common.h +++ b/lib/lz5common.h @@ -198,7 +198,7 @@ FORCE_INLINE int LZ5HC_more_profitable(uint32_t best_off, uint32_t best_common, * HC Types ***************************************/ /** from faster to stronger */ -typedef enum { LZ5HC_lowest_fast, LZ5HC_lowest_price } LZ5HC_strategy; +typedef enum { LZ5HC_fast, LZ5HC_price_fast, LZ5HC_lowest_price } LZ5HC_strategy; typedef struct { @@ -234,7 +234,7 @@ struct LZ5HC_Data_s /* ************************************* * HC Pre-defined compression levels ***************************************/ -#define LZ5HC_MAX_CLEVEL 9 +#define LZ5HC_MAX_CLEVEL 12 static const int g_maxCompressionLevel = LZ5HC_MAX_CLEVEL; static const int LZ5HC_compressionLevel_default = 5; @@ -242,18 +242,31 @@ static const int LZ5HC_compressionLevel_default = 5; static const LZ5HC_parameters LZ5HC_defaultParameters[LZ5HC_MAX_CLEVEL+1] = { /* W, C, H, H3, S, L, strat */ - { 0, 0, 0, 0, 0, 0, 0 }, /* level 0 - never used */ - { 22, 22, 13, 13, 1, 4, LZ5HC_lowest_price }, /* level 1 */ - { 22, 22, 15, 13, 1, 4, LZ5HC_lowest_price }, /* level 2 */ - { 22, 22, 17, 13, 1, 4, LZ5HC_lowest_price }, /* level 3 */ - { 22, 22, 13, 13, 1, 4, LZ5HC_lowest_fast }, /* level 4 */ - { 22, 22, 17, 13, 1, 4, LZ5HC_lowest_fast }, /* level 5 */ - { 22, 22, 19, 16, 1, 4, LZ5HC_lowest_price }, /* level 6 */ - { 22, 22, 23, 16, 3, 4, LZ5HC_lowest_price }, /* level 7 */ - { 22, 22, 23, 16, 8, 4, LZ5HC_lowest_price }, /* level 8 */ - { 22, 22, 23, 16, 32, 4, LZ5HC_lowest_price }, /* level 9 */ + { 0, 0, 0, 0, 0, 0, LZ5HC_fast }, // level 0 - never used + { 22, 22, 13, 0, 4, 6, LZ5HC_fast }, // level 1 + // { 22, 22, 14, 0, 4, 6, LZ5HC_fast }, // level 2 + { 22, 22, 13, 0, 2, 6, LZ5HC_fast }, // level 3 + // { 22, 22, 14, 0, 2, 6, LZ5HC_fast }, // level 4 + // { 22, 22, 13, 0, 2, 5, LZ5HC_fast }, // level 5 + // { 22, 22, 14, 0, 2, 5, LZ5HC_fast }, // level 6 + { 22, 22, 13, 0, 1, 5, LZ5HC_fast }, // level 7 + // { 22, 22, 14, 0, 1, 5, LZ5HC_fast }, // level 8 + // { 22, 22, 15, 0, 1, 5, LZ5HC_fast }, // level 9 + // { 22, 22, 17, 0, 1, 5, LZ5HC_fast }, // level 10 + // { 22, 22, 14, 13, 4, 6, LZ5HC_price_fast }, // level 12 + // { 22, 22, 14, 13, 2, 5, LZ5HC_price_fast }, // level 13 + { 22, 22, 14, 13, 1, 4, LZ5HC_price_fast }, // level 14 + { 22, 22, 17, 13, 1, 4, LZ5HC_price_fast }, // level 15 + { 22, 22, 15, 13, 1, 4, LZ5HC_lowest_price }, // level 16 + { 22, 22, 17, 13, 1, 4, LZ5HC_lowest_price }, // level 17 + { 22, 22, 19, 16, 1, 4, LZ5HC_lowest_price }, // level 18 + { 22, 22, 23, 16, 3, 4, LZ5HC_lowest_price }, // level 19 + { 22, 22, 23, 16, 8, 4, LZ5HC_lowest_price }, // level 20 + { 22, 22, 23, 16, 32, 4, LZ5HC_lowest_price }, // level 21 + { 22, 22, 23, 16, 128, 4, LZ5HC_lowest_price }, // level 22 }; + #if defined (__cplusplus) } #endif diff --git a/lib/lz5hc.c b/lib/lz5hc.c index 9d2e246..05d2421 100644 --- a/lib/lz5hc.c +++ b/lib/lz5hc.c @@ -319,6 +319,48 @@ FORCE_INLINE int LZ5HC_FindBestMatchFaster (LZ5HC_Data_Structure* ctx, U32 match } +FORCE_INLINE int LZ5HC_FindBestMatchFastest (LZ5HC_Data_Structure* ctx, U32 matchIndex, /* Index table will be updated */ + const BYTE* ip, const BYTE* const iLimit, + const BYTE** matchpos) +{ + const BYTE* const base = ctx->base; + const BYTE* const dictBase = ctx->dictBase; + const U32 dictLimit = ctx->dictLimit; + const U32 maxDistance = (1 << ctx->params.windowLog); + const U32 lowLimit = (ctx->lowLimit + maxDistance > (U32)(ip-base)) ? ctx->lowLimit : (U32)(ip - base) - (maxDistance - 1); + const BYTE* match; + size_t ml=0, mlt; + + if (matchIndex>=lowLimit) + { + if (matchIndex >= dictLimit) + { + match = base + matchIndex; + if (match < ip && *(match+ml) == *(ip+ml) && (MEM_read32(match) == MEM_read32(ip))) + { + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, iLimit) + MINMATCH; + if (mlt > ml) { ml = mlt; *matchpos = match; } + } + } + else + { + match = dictBase + matchIndex; + if (MEM_read32(match) == MEM_read32(ip)) + { + const BYTE* vLimit = ip + (dictLimit - matchIndex); + if (vLimit > iLimit) vLimit = iLimit; + mlt = MEM_count(ip+MINMATCH, match+MINMATCH, vLimit) + MINMATCH; + if ((ip+mlt == vLimit) && (vLimit < iLimit)) + mlt += MEM_count(ip+mlt, base+dictLimit, iLimit); + if (mlt > ml) { ml = mlt; *matchpos = base + matchIndex; } /* virtual matchpos */ + } + } + } + + return (int)ml; +} + + FORCE_INLINE int LZ5HC_GetWiderMatch ( LZ5HC_Data_Structure* ctx, const BYTE* const ip, @@ -668,7 +710,7 @@ static int LZ5HC_compress_lowest_price ( -static int LZ5HC_compress_lowest_fast ( +static int LZ5HC_compress_price_fast ( LZ5HC_Data_Structure* ctx, const char* source, char* dest, @@ -795,6 +837,73 @@ static int LZ5HC_compress_lowest_fast ( +static int LZ5HC_compress_fast ( + LZ5HC_Data_Structure* ctx, + const char* source, + char* dest, + int inputSize, + int maxOutputSize, + limitedOutput_directive limit + ) +{ + ctx->inputBuffer = (BYTE*) source; + ctx->outputBuffer = (BYTE*) dest; + const BYTE* ip = (const BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = (iend - LASTLITERALS); + + BYTE* op = (BYTE*) dest; + BYTE* const oend = op + maxOutputSize; + + int ml; + const BYTE* ref=NULL; + const BYTE* lowPrefixPtr = ctx->base + ctx->dictLimit; + const BYTE* const base = ctx->base; + U32* HashPos; + U32* HashTable = ctx->hashTable; + const int accel = (ctx->params.searchNum>0)?ctx->params.searchNum:1; + + /* init */ + ctx->end += inputSize; + + ip++; + + /* Main Loop */ + while (ip < mflimit) + { + HashPos = &HashTable[LZ5HC_hashPtr(ip, ctx->params.hashLog, ctx->params.searchLength)]; + ml = LZ5HC_FindBestMatchFastest (ctx, *HashPos, ip, matchlimit, (&ref)); + *HashPos = (U32)(ip - base); + if (!ml) { ip+=accel; continue; } + + int back = 0; + while ((ip+back>anchor) && (ref+back > lowPrefixPtr) && (ip[back-1] == ref[back-1])) back--; + ml -= back; + ip += back; + ref += back; + + if (LZ5HC_encodeSequence(ctx, &ip, &op, &anchor, ml, ref, limit, oend)) return 0; + + } + + /* Encode Last Literals */ + { + int lastRun = (int)(iend - anchor); + if ((limit) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0; /* Check output limit */ + if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK< 254 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; } + else *op++ = (BYTE)(lastRun<params.strategy) { default: - case LZ5HC_lowest_fast: - return LZ5HC_compress_lowest_fast(ctx, source, dest, inputSize, maxOutputSize, limit); + case LZ5HC_fast: + return LZ5HC_compress_fast(ctx, source, dest, inputSize, maxOutputSize, limit); + case LZ5HC_price_fast: + return LZ5HC_compress_price_fast(ctx, source, dest, inputSize, maxOutputSize, limit); case LZ5HC_lowest_price: return LZ5HC_compress_lowest_price(ctx, source, dest, inputSize, maxOutputSize, limit); } From 34f8af2403d7ec43dcab041a7c1cec310089d9df Mon Sep 17 00:00:00 2001 From: inikep Date: Sun, 29 Nov 2015 23:42:26 +0100 Subject: [PATCH 16/22] working fullbench --- lib/lz5.c | 3 ++- lib/lz5frame.c | 28 +++++++++++++--------------- lib/lz5hc.c | 21 +++++++++++++-------- lib/lz5hc.h | 2 +- programs/Makefile | 2 +- programs/fullbench.c | 12 ++++++------ 6 files changed, 36 insertions(+), 32 deletions(-) diff --git a/lib/lz5.c b/lib/lz5.c index fd5ae2d..2699167 100644 --- a/lib/lz5.c +++ b/lib/lz5.c @@ -41,6 +41,7 @@ #include "mem.h" #include "lz5common.h" #include "lz5.h" +#include /************************************** @@ -970,7 +971,7 @@ FORCE_INLINE int LZ5_decompress_generic( /* copy literals */ cpy = op+length; - if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(1+1+LASTLITERALS))) ) + if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(0+1+LASTLITERALS))) ) || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH))) { if (partialDecoding) diff --git a/lib/lz5frame.c b/lib/lz5frame.c index c504034..a371f05 100644 --- a/lib/lz5frame.c +++ b/lib/lz5frame.c @@ -53,6 +53,7 @@ You can contact the author at : #define ALLOCATOR(s) calloc(1,s) #define FREEMEM free #include /* memset, memcpy, memmove */ +#include #define MEM_INIT memset @@ -493,14 +494,14 @@ size_t LZ5F_compressBound(size_t srcSize, const LZ5F_preferences_t* preferencesP } -typedef int (*compressFunc_t)(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level); +typedef int (*compressFunc_t)(void* ctx, const char* src, char* dst, int srcSize, int dstSize); -static size_t LZ5F_compressBlock(void* dst, const void* src, size_t srcSize, compressFunc_t compress, void* lz5ctx, int level) +static size_t LZ5F_compressBlock(void* dst, const void* src, size_t srcSize, compressFunc_t compress, void* lz5ctx) { /* compress one block */ BYTE* cSizePtr = (BYTE*)dst; U32 cSize; - cSize = (U32)compress(lz5ctx, (const char*)src, (char*)(cSizePtr+4), (int)(srcSize), (int)(srcSize-1), level); + cSize = (U32)compress(lz5ctx, (const char*)src, (char*)(cSizePtr+4), (int)(srcSize), (int)(srcSize-1)); LZ5F_writeLE32(cSizePtr, cSize); if (cSize == 0) /* compression failed */ { @@ -512,21 +513,18 @@ static size_t LZ5F_compressBlock(void* dst, const void* src, size_t srcSize, com } -static int LZ5F_localLZ5_compress_limitedOutput_withState(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level) +static int LZ5F_localLZ5_compress_limitedOutput_withState(void* ctx, const char* src, char* dst, int srcSize, int dstSize) { - (void) level; return LZ5_compress_limitedOutput_withState(ctx, src, dst, srcSize, dstSize); } -static int LZ5F_localLZ5_compress_limitedOutput_continue(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level) +static int LZ5F_localLZ5_compress_limitedOutput_continue(void* ctx, const char* src, char* dst, int srcSize, int dstSize) { - (void) level; return LZ5_compress_limitedOutput_continue((LZ5_stream_t*)ctx, src, dst, srcSize, dstSize); } -static int LZ5F_localLZ5_compressHC_limitedOutput_continue(void* ctx, const char* src, char* dst, int srcSize, int dstSize, int level) +static int LZ5F_localLZ5_compressHC_limitedOutput_continue(void* ctx, const char* src, char* dst, int srcSize, int dstSize) { - (void) level; return LZ5_compress_HC_continue((LZ5_streamHC_t*)ctx, src, dst, srcSize, dstSize); } @@ -599,7 +597,7 @@ size_t LZ5F_compressUpdate(LZ5F_compressionContext_t compressionContext, void* d memcpy(cctxPtr->tmpIn + cctxPtr->tmpInSize, srcBuffer, sizeToCopy); srcPtr += sizeToCopy; - dstPtr += LZ5F_compressBlock(dstPtr, cctxPtr->tmpIn, blockSize, compress, cctxPtr->lz5CtxPtr, cctxPtr->prefs.compressionLevel); + dstPtr += LZ5F_compressBlock(dstPtr, cctxPtr->tmpIn, blockSize, compress, cctxPtr->lz5CtxPtr); if (cctxPtr->prefs.frameInfo.blockMode==LZ5F_blockLinked) cctxPtr->tmpIn += blockSize; cctxPtr->tmpInSize = 0; @@ -610,7 +608,7 @@ size_t LZ5F_compressUpdate(LZ5F_compressionContext_t compressionContext, void* d { /* compress full block */ lastBlockCompressed = fromSrcBuffer; - dstPtr += LZ5F_compressBlock(dstPtr, srcPtr, blockSize, compress, cctxPtr->lz5CtxPtr, cctxPtr->prefs.compressionLevel); + dstPtr += LZ5F_compressBlock(dstPtr, srcPtr, blockSize, compress, cctxPtr->lz5CtxPtr); srcPtr += blockSize; } @@ -618,7 +616,7 @@ size_t LZ5F_compressUpdate(LZ5F_compressionContext_t compressionContext, void* d { /* compress remaining input < blockSize */ lastBlockCompressed = fromSrcBuffer; - dstPtr += LZ5F_compressBlock(dstPtr, srcPtr, srcEnd - srcPtr, compress, cctxPtr->lz5CtxPtr, cctxPtr->prefs.compressionLevel); + dstPtr += LZ5F_compressBlock(dstPtr, srcPtr, srcEnd - srcPtr, compress, cctxPtr->lz5CtxPtr); srcPtr = srcEnd; } @@ -687,7 +685,7 @@ size_t LZ5F_flush(LZ5F_compressionContext_t compressionContext, void* dstBuffer, compress = LZ5F_selectCompression(cctxPtr->prefs.frameInfo.blockMode, cctxPtr->prefs.compressionLevel); /* compress tmp buffer */ - dstPtr += LZ5F_compressBlock(dstPtr, cctxPtr->tmpIn, cctxPtr->tmpInSize, compress, cctxPtr->lz5CtxPtr, cctxPtr->prefs.compressionLevel); + dstPtr += LZ5F_compressBlock(dstPtr, cctxPtr->tmpIn, cctxPtr->tmpInSize, compress, cctxPtr->lz5CtxPtr); if (cctxPtr->prefs.frameInfo.blockMode==LZ5F_blockLinked) cctxPtr->tmpIn += cctxPtr->tmpInSize; cctxPtr->tmpInSize = 0; @@ -1152,7 +1150,7 @@ size_t LZ5F_decompress(LZ5F_decompressionContext_t decompressionContext, dctxPtr->dStage = dstage_getSuffix; break; } - if (nextCBlockSize > dctxPtr->maxBlockSize) return (size_t)-LZ5F_ERROR_GENERIC; /* invalid cBlockSize */ + if (nextCBlockSize > dctxPtr->maxBlockSize) return (size_t)-LZ5F_ERROR_GENERIC; /* invalid cBlockSize */ dctxPtr->tmpInTarget = nextCBlockSize; if (LZ5F_readLE32(selectedIn) & LZ5F_BLOCKUNCOMPRESSED_FLAG) { @@ -1246,7 +1244,7 @@ size_t LZ5F_decompress(LZ5F_decompressionContext_t decompressionContext, decoder = LZ5F_decompress_safe; decodedSize = decoder((const char*)selectedIn, (char*)dstPtr, (int)dctxPtr->tmpInTarget, (int)dctxPtr->maxBlockSize, (const char*)dctxPtr->dict, (int)dctxPtr->dictSize); - if (decodedSize < 0) return (size_t)-LZ5F_ERROR_GENERIC; /* decompression failed */ + if (decodedSize < 0) return (size_t)-LZ5F_ERROR_GENERIC; /* decompression failed */ if (dctxPtr->frameInfo.contentChecksumFlag) XXH32_update(&(dctxPtr->xxh), dstPtr, decodedSize); if (dctxPtr->frameInfo.contentSize) dctxPtr->frameRemainingSize -= decodedSize; diff --git a/lib/lz5hc.c b/lib/lz5hc.c index 05d2421..21f491e 100644 --- a/lib/lz5hc.c +++ b/lib/lz5hc.c @@ -59,13 +59,13 @@ int LZ5_alloc_mem_HC(LZ5HC_Data_Structure* ctx, int compressionLevel) ctx->params = LZ5HC_defaultParameters[ctx->compressionLevel]; - ctx->hashTable = ALLOCATOR(1, sizeof(U32)*((1 << ctx->params.hashLog3)+(1 << ctx->params.hashLog))); + ctx->hashTable = (U32*) ALLOCATOR(1, sizeof(U32)*((1 << ctx->params.hashLog3)+(1 << ctx->params.hashLog))); if (!ctx->hashTable) return 0; ctx->hashTable3 = ctx->hashTable + (1 << ctx->params.hashLog); - ctx->chainTable = ALLOCATOR(1, sizeof(U32)*(1 << ctx->params.contentLog)); + ctx->chainTable = (U32*) ALLOCATOR(1, sizeof(U32)*(1 << ctx->params.contentLog)); if (!ctx->chainTable) { FREEMEM(ctx->hashTable); @@ -625,7 +625,7 @@ static int LZ5HC_compress_lowest_price ( ml2 = LZ5HC_GetWiderMatch(ctx, ip + ml - 2, anchor, matchlimit, 0, &ref2, &start2); if (ml2 == 0) goto _Encode; - + { int price, best_price, off0=0, off1=0; uint8_t *pos, *best_pos; @@ -667,10 +667,11 @@ static int LZ5HC_compress_lowest_price ( break; } } - // LZ5HC_DEBUG("%u: TRY last_off=%d literals=%u off=%u mlen=%u literals2=%u off2=%u mlen2=%u best=%d\n", (U32)(ip - ctx->inputBuffer), ctx->last_off, (U32)(ip - anchor), off0, (U32)ml, (U32)(start2 - anchor), off1, ml2, (U32)(best_pos - ip)); - ml = best_pos - ip; + } + + if (ml < MINMATCH) { ip = start2; @@ -761,12 +762,14 @@ static int LZ5HC_compress_price_fast ( if (ip - ref == ctx->last_off) { ml2=0; goto _Encode; } + { int back = 0; while ((ip+back>anchor) && (ref+back > lowPrefixPtr) && (ip[back-1] == ref[back-1])) back--; ml -= back; ip += back; ref += back; - + } + _Search: if (ip+ml >= mflimit) goto _Encode; @@ -776,11 +779,13 @@ static int LZ5HC_compress_price_fast ( *HashPos = (U32)(start2 - base); if (!ml2) goto _Encode; - back = 0; + { + int back = 0; while ((start2+back>ip) && (ref2+back > lowPrefixPtr) && (start2[back-1] == ref2[back-1])) back--; ml2 -= back; start2 += back; ref2 += back; + } // LZ5HC_DEBUG("%u: TRY last_off=%d literals=%u off=%u mlen=%u literals2=%u off2=%u mlen2=%u best=%d\n", (U32)(ip - ctx->inputBuffer), ctx->last_off, (U32)(ip - anchor), off0, (U32)ml, (U32)(start2 - anchor), off1, ml2, (U32)(best_pos - ip)); @@ -1098,7 +1103,7 @@ int LZ5_saveDictHC (LZ5_streamHC_t* LZ5_streamHCPtr, char* safeBuffer, int dictS * Deprecated Functions ***********************************/ /* Deprecated compression functions */ -/* These functions are planned to start generate warnings by r131 approximately */ +/* These functions are planned to start generate warnings by r132 approximately */ int LZ5_compressHC(const char* src, char* dst, int srcSize) { return LZ5_compress_HC (src, dst, srcSize, LZ5_compressBound(srcSize), 0); } int LZ5_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ5_compress_HC(src, dst, srcSize, maxDstSize, 0); } int LZ5_compressHC_continue (LZ5_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ5_compress_HC_continue (ctx, src, dst, srcSize, LZ5_compressBound(srcSize)); } diff --git a/lib/lz5hc.h b/lib/lz5hc.h index b4ddbc5..63bda0b 100644 --- a/lib/lz5hc.h +++ b/lib/lz5hc.h @@ -166,7 +166,7 @@ int LZ5_saveDictHC (LZ5_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSi #endif // LZ5_DEPRECATE_WARNING_DEFBLOCK /* compression functions */ -/* these functions are planned to trigger warning messages by r131 approximately */ +/* these functions are planned to trigger warning messages by r132 approximately */ int LZ5_compressHC (const char* source, char* dest, int inputSize); int LZ5_compressHC_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize); int LZ5_compressHC_continue (LZ5_streamHC_t* LZ5_streamHCPtr, const char* source, char* dest, int inputSize); diff --git a/programs/Makefile b/programs/Makefile index 8a4bc97..3e10763 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -32,7 +32,7 @@ # datagen : generates synthetic data samples for tests & benchmarks # ########################################################################## -RELEASE ?= r131 +RELEASE ?= r132 BINDIR := $(PREFIX)/bin MANDIR := $(PREFIX)/share/man/man1 diff --git a/programs/fullbench.c b/programs/fullbench.c index d09679e..233a038 100644 --- a/programs/fullbench.c +++ b/programs/fullbench.c @@ -483,7 +483,7 @@ static int local_LZ5_saveDictHC(const char* in, char* out, int inSize) static int local_LZ5_compressHC_withStateHC(const char* in, char* out, int inSize) { int res = 0; - if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC))) + if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC),0)) { res = LZ5_compressHC_withStateHC(&LZ5_streamHC, in, out, inSize); LZ5_free_mem_HC((LZ5HC_Data_Structure*)&LZ5_streamHC); @@ -494,7 +494,7 @@ static int local_LZ5_compressHC_withStateHC(const char* in, char* out, int inSiz static int local_LZ5_compressHC_limitedOutput_withStateHC(const char* in, char* out, int inSize) { int res = 0; - if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC))) + if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC),0)) { res = LZ5_compressHC_limitedOutput_withStateHC(&LZ5_streamHC, in, out, inSize, LZ5_compressBound(inSize)-1); LZ5_free_mem_HC((LZ5HC_Data_Structure*)&LZ5_streamHC); @@ -505,7 +505,7 @@ static int local_LZ5_compressHC_limitedOutput_withStateHC(const char* in, char* static int local_LZ5_compressHC_limitedOutput(const char* in, char* out, int inSize) { int res = 0; - if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC))) + if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC),0)) { res = LZ5_compressHC_limitedOutput(in, out, inSize, LZ5_compressBound(inSize)-1); LZ5_free_mem_HC((LZ5HC_Data_Structure*)&LZ5_streamHC); @@ -516,7 +516,7 @@ static int local_LZ5_compressHC_limitedOutput(const char* in, char* out, int inS static int local_LZ5_compressHC_continue(const char* in, char* out, int inSize) { int res = 0; - if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC))) + if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC),0)) { res = LZ5_compressHC_continue(&LZ5_streamHC, in, out, inSize); LZ5_free_mem_HC((LZ5HC_Data_Structure*)&LZ5_streamHC); @@ -527,7 +527,7 @@ static int local_LZ5_compressHC_continue(const char* in, char* out, int inSize) static int local_LZ5_compressHC_limitedOutput_continue(const char* in, char* out, int inSize) { int res = 0; - if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC))) + if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC),0)) { res = LZ5_compressHC_limitedOutput_continue(&LZ5_streamHC, in, out, inSize, LZ5_compressBound(inSize)-1); LZ5_free_mem_HC((LZ5HC_Data_Structure*)&LZ5_streamHC); @@ -734,7 +734,7 @@ int fullSpeedBench(char** fileNamesTable, int nbFiles) LZ5_loadDict(&LZ5_stream, chunkP[0].origBuffer, chunkP[0].origSize); break; case 41: compressionFunction = local_LZ5_saveDictHC; compressorName = "LZ5_saveDictHC"; - if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC))) + if (LZ5_alloc_mem_HC((LZ5HC_Data_Structure*)(&LZ5_streamHC),0)) { LZ5_loadDictHC(&LZ5_streamHC, chunkP[0].origBuffer, chunkP[0].origSize); LZ5_free_mem_HC((LZ5HC_Data_Structure*)&LZ5_streamHC); From 0a10991997692e5cc940505a73e4beabbf31684f Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 30 Nov 2015 10:23:25 +0100 Subject: [PATCH 17/22] PRIu64 --- lib/lz5common.h | 19 ++++++++++++++++++- lib/lz5frame.c | 2 +- lib/lz5hc.c | 7 ++++--- lib/lz5hc.h | 1 - lib/mem.h | 14 -------------- programs/Makefile | 15 +++++++++------ programs/bench.c | 2 +- programs/lz5io.c | 18 +++++++++++------- 8 files changed, 44 insertions(+), 34 deletions(-) diff --git a/lib/lz5common.h b/lib/lz5common.h index c5582cb..13f2549 100644 --- a/lib/lz5common.h +++ b/lib/lz5common.h @@ -47,7 +47,8 @@ extern "C" { # endif /* __STDC_VERSION__ */ #endif /* _MSC_VER */ -/* LZ5_GCC_VERSION is defined into lz5.h */ +#define LZ5_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + #if (LZ5_GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) # define expect(expr,value) (__builtin_expect ((expr),(value)) ) #else @@ -58,6 +59,22 @@ extern "C" { #define unlikely(expr) expect((expr) != 0, 0) + +/* ************************************* +* Local Compiler Options +***************************************/ +#if defined(__GNUC__) +# pragma GCC diagnostic ignored "-Wunused-function" +#endif + +#if defined (__clang__) +# pragma clang diagnostic ignored "-Wunused-function" +#endif + + + + + /************************************** * Memory routines **************************************/ diff --git a/lib/lz5frame.c b/lib/lz5frame.c index a371f05..e3e3bf6 100644 --- a/lib/lz5frame.c +++ b/lib/lz5frame.c @@ -99,7 +99,7 @@ typedef unsigned long long U64; #define _8BITS 0xFF #define LZ5F_MAGIC_SKIPPABLE_START 0x184D2A50U -#define LZ5F_MAGICNUMBER 0x184D2204U +#define LZ5F_MAGICNUMBER 0x184D2205U #define LZ5F_BLOCKUNCOMPRESSED_FLAG 0x80000000U #define LZ5F_BLOCKSIZEID_DEFAULT LZ5F_max64KB diff --git a/lib/lz5hc.c b/lib/lz5hc.c index 21f491e..b38c6a4 100644 --- a/lib/lz5hc.c +++ b/lib/lz5hc.c @@ -39,10 +39,10 @@ /* ************************************* * Includes ***************************************/ -#include "lz5hc.h" #include "mem.h" #include "lz5common.h" #include "lz5.h" +#include "lz5hc.h" #include @@ -459,7 +459,7 @@ FORCE_INLINE int LZ5HC_GetWiderMatch ( } else { - const BYTE* match = dictBase + matchIndex; + match = dictBase + matchIndex; if (MEM_read32(match) == MEM_read32(ip)) { size_t mlt; @@ -626,7 +626,8 @@ static int LZ5HC_compress_lowest_price ( if (ml2 == 0) goto _Encode; { - int price, best_price, off0=0, off1=0; + int price, best_price; + U32 off0=0, off1=0; uint8_t *pos, *best_pos; // find the lowest price for encoding ml bytes diff --git a/lib/lz5hc.h b/lib/lz5hc.h index 63bda0b..167c3e2 100644 --- a/lib/lz5hc.h +++ b/lib/lz5hc.h @@ -152,7 +152,6 @@ int LZ5_saveDictHC (LZ5_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSi You can also define LZ5_DEPRECATE_WARNING_DEFBLOCK. */ #ifndef LZ5_DEPRECATE_WARNING_DEFBLOCK # define LZ5_DEPRECATE_WARNING_DEFBLOCK -# define LZ5_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) # if (LZ5_GCC_VERSION >= 405) || defined(__clang__) # define LZ5_DEPRECATED(message) __attribute__((deprecated(message))) # elif (LZ5_GCC_VERSION >= 301) diff --git a/lib/mem.h b/lib/mem.h index 922fdbb..ba43681 100644 --- a/lib/mem.h +++ b/lib/mem.h @@ -47,20 +47,6 @@ extern "C" { -/* ************************************* -* Local Compiler Options -***************************************/ -#if defined(__GNUC__) -# pragma GCC diagnostic ignored "-Wunused-function" -#endif - -#if defined (__clang__) -# pragma clang diagnostic ignored "-Wunused-function" -#endif - - - - /****************************************** * Compiler-specific ******************************************/ diff --git a/programs/Makefile b/programs/Makefile index 3e10763..c824fb5 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -38,12 +38,12 @@ BINDIR := $(PREFIX)/bin MANDIR := $(PREFIX)/share/man/man1 LZ5DIR := ../lib -DESTDIR ?= -PREFIX ?= /usr/local -CFLAGS ?= -O3 # can select customized optimized flags such as for example : CFLAGS="-O2 -g" make -CFLAGS += -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-qual -Wcast-align -Wstrict-prototypes -pedantic -CPPFLAGS:= -I$(LZ5DIR) -DXXH_NAMESPACE=LZ5_ -DLZ5_VERSION=\"$(RELEASE)\" -FLAGS := $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) +DESTDIR ?= +PREFIX ?= /usr/local +OPTFLAGS ?= -O3 -fomit-frame-pointer -fstrict-aliasing -fforce-addr -ffast-math # can select customized optimized flags such as for example : CFLAGS="-O2 -g" make +CFLAGS ?= -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes -pedantic -Wno-variadic-macros -Wno-format +CPPFLAGS := -I$(LZ5DIR) -DXXH_NAMESPACE=LZ5_ -DLZ5_VERSION=\"$(RELEASE)\" +FLAGS := $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(OPTFLAGS) # Define *.exe as extension for Windows systems @@ -93,6 +93,9 @@ frametest32: $(LZ5DIR)/lz5frame.c $(LZ5DIR)/lz5.c $(LZ5DIR)/lz5hc.c $(LZ5DIR)/xx datagen : datagen.o datagencli.c $(CC) $(FLAGS) $^ -o $@$(EXT) +.c.o: + $(CC) $(FLAGS) $< -c -o $@ + clean: @$(MAKE) -C $(LZ5DIR) $@ > $(VOID) @rm -f core *.o *.test tmp* \ diff --git a/programs/bench.c b/programs/bench.c index 09aa07a..c235e9c 100644 --- a/programs/bench.c +++ b/programs/bench.c @@ -430,7 +430,7 @@ int BMK_benchFiles(const char** fileNamesTable, int nbFiles, int cLevel) } if (nbFiles > 1) - DISPLAY("%-16.16s :%10llu ->%10llu (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", " TOTAL", (long long unsigned int)totals, (long long unsigned int)totalz, (double)totalz/(double)totals*100., (double)totals/totalc/1000., (double)totals/totald/1000.); + DISPLAY("%-16.16s :%10" PRIu64 " ->%10" PRIu64 " (%5.2f%%), %6.1f MB/s , %6.1f MB/s\n", " TOTAL", (long long unsigned int)totals, (long long unsigned int)totalz, (double)totalz/(double)totals*100.0, (double)totals/totalc/1000.0, (double)totals/totald/1000.0); if (BMK_pause) { DISPLAY("\npress enter...\n"); (void)getchar(); } diff --git a/programs/lz5io.c b/programs/lz5io.c index 6f15840..5335aa7 100644 --- a/programs/lz5io.c +++ b/programs/lz5io.c @@ -39,6 +39,10 @@ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #endif +/* Add support for %lld in printf */ +#define __STDC_FORMAT_MACROS // now PRIu64 will work +#include + #define _LARGE_FILES /* Large file support on 32-bits AIX */ #define _FILE_OFFSET_BITS 64 /* Large file support on 32-bits unix */ @@ -99,10 +103,10 @@ #define _8BITS 0xFF #define MAGICNUMBER_SIZE 4 -#define LZ5IO_MAGICNUMBER 0x184D2205 -#define LZ5IO_SKIPPABLE0 0x184D2A50 -#define LZ5IO_SKIPPABLEMASK 0xFFFFFFF0 -#define LEGACY_MAGICNUMBER 0x184C2102 +#define LZ5IO_MAGICNUMBER 0x184D2205U +#define LZ5IO_SKIPPABLE0 0x184D2A50U +#define LZ5IO_SKIPPABLEMASK 0xFFFFFFF0U +#define LEGACY_MAGICNUMBER 0x184C2102U #define CACHELINE 64 #define LEGACY_BLOCKSIZE (8 MB) @@ -396,7 +400,7 @@ int LZ5IO_compressFilename_Legacy(const char* input_filename, const char* output end = clock(); DISPLAYLEVEL(2, "\r%79s\r", ""); filesize += !filesize; /* avoid divide by zero */ - DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n", + DISPLAYLEVEL(2,"Compressed %" PRIu64 " bytes into %" PRIu64 " bytes ==> %.2f%%\n", (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100); { double seconds = (double)(end - start)/CLOCKS_PER_SEC; @@ -558,7 +562,7 @@ static int LZ5IO_compressFilename_extRess(cRess_t ress, const char* srcFileName, /* Final Status */ DISPLAYLEVEL(2, "\r%79s\r", ""); - DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n", + DISPLAYLEVEL(2, "Compressed %" PRIu64 " bytes into %" PRIu64 " bytes ==> %.2f%%\n", filesize, compressedfilesize, (double)compressedfilesize/(filesize + !filesize)*100); /* avoid division by zero */ return 0; @@ -980,7 +984,7 @@ static int LZ5IO_decompressFile_extRess(dRess_t ress, const char* input_filename /* Final Status */ DISPLAYLEVEL(2, "\r%79s\r", ""); - DISPLAYLEVEL(2, "Successfully decoded %llu bytes \n", filesize); + DISPLAYLEVEL(2, "Successfully decoded %" PRIu64 " bytes \n", filesize); /* Close */ fclose(finput); From 9cca7d8996c0c59e8365ab3c04194e92ea550ff8 Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 30 Nov 2015 12:55:26 +0100 Subject: [PATCH 18/22] updated text files --- NEWS | 7 +++---- README.md | 42 ++++++++++++++++++++++++------------------ programs/lz5cli.c | 4 ++-- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/NEWS b/NEWS index 32676b3..f4c8652 100644 --- a/NEWS +++ b/NEWS @@ -1,8 +1,7 @@ r132 -- added: a new parser -- added: a gain function -- added: a special codeword for the last occured offset -- added: support for 3-byte long matches +- added: new parsers: LZ5HC_fast, LZ5HC_price_fast, LZ5HC_lowest_price +- added: a special 1-byte codeword for the last occured offset +- added: support for 3-byte long matches (MINMATCH = 3) r131 The first release based on LZ4 r132 dev diff --git a/README.md b/README.md index 8b2f1b1..50a51d0 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ This is caused mainly because of 22-bit dictionary instead of 16-bit in LZ4. LZ5 uses different output codewords and is not compatible with LZ4. LZ4 output codewords are 3 byte long (24-bit) and look as follows: - LLLL_MMMM OOOOOOOO OOOOOOOO - 16-bit offset, 4-bit match length, 4-bit literal length -LZ5 uses 3 types of codewords from 2 to 4 bytes long: +LZ5 uses 4 types of codewords from 1 to 4 bytes long: - 1_OO_LL_MMM OOOOOOOO - 10-bit offset, 3-bit match length, 2-bit literal length - 00_LLL_MMM OOOOOOOO OOOOOOOO - 16-bit offset, 3-bit match length, 3-bit literal length - 010_LL_MMM OOOOOOOO OOOOOOOO OOOOOOOO - 24-bit offset, 3-bit match length, 2-bit literal length @@ -18,7 +18,7 @@ LZ5 uses 3 types of codewords from 2 to 4 bytes long: Benchmarks ------------------------- -In our experiments decompression speed of LZ5 is from 650-950 MB/s. It's slower than LZ4 but much faster than zstd and brotli. +In our experiments decompression speed of LZ5 is from 600-1600 MB/s. It's slower than LZ4 but much faster than zstd and brotli. With the compresion ratio is opposite: LZ5 is better than LZ4 but worse than zstd and brotli. | Compressor name | Compression| Decompress.| Compr. size | Ratio | @@ -33,22 +33,28 @@ With the compresion ratio is opposite: LZ5 is better than LZ4 but worse than zst | lz4hc r131 -11 | 20 MB/s | 1969 MB/s | 54751363 | 52.21 | | lz4hc r131 -13 | 17 MB/s | 1969 MB/s | 54744790 | 52.21 | | lz4hc r131 -15 | 14 MB/s | 2007 MB/s | 54741827 | 52.21 | -| lz5 r131 | 195 MB/s | 939 MB/s | 55884927 | 53.30 | -| lz5hc r131 -1 | 32 MB/s | 742 MB/s | 52927122 | 50.48 | -| lz5hc r131 -3 | 20 MB/s | 716 MB/s | 50970192 | 48.61 | -| lz5hc r131 -5 | 10 MB/s | 701 MB/s | 49970285 | 47.66 | -| lz5hc r131 -7 | 5.54 MB/s | 682 MB/s | 49541511 | 47.25 | -| lz5hc r131 -9 | 2.69 MB/s | 673 MB/s | 49346894 | 47.06 | -| lz5hc r131 -11 | 1.36 MB/s | 664 MB/s | 49266526 | 46.98 | -| zstd v0.3 | 257 MB/s | 547 MB/s | 51231016 | 48.86 | -| zstd_HC v0.3 -1 | 257 MB/s | 553 MB/s | 51231016 | 48.86 | -| zstd_HC v0.3 -3 | 76 MB/s | 417 MB/s | 46774383 | 44.61 | -| zstd_HC v0.3 -5 | 40 MB/s | 476 MB/s | 45628362 | 43.51 | -| zstd_HC v0.3 -9 | 14 MB/s | 485 MB/s | 44840562 | 42.76 | -| zstd_HC v0.3 -13 | 9.34 MB/s | 469 MB/s | 43114895 | 41.12 | -| zstd_HC v0.3 -17 | 6.02 MB/s | 463 MB/s | 42989971 | 41.00 | -| zstd_HC v0.3 -21 | 3.35 MB/s | 461 MB/s | 42956964 | 40.97 | -| zstd_HC v0.3 -23 | 2.33 MB/s | 463 MB/s | 42934217 | 40.95 | +| lz5 r132 | 180 MB/s | 877 MB/s | 56183327 | 53.58 | +| lz5hc r132 level 1 | 453 MB/s | 1649 MB/s | 68770655 | 65.58 | +| lz5hc r132 level 2 | 341 MB/s | 1533 MB/s | 65201626 | 62.18 | +| lz5hc r132 level 3 | 222 MB/s | 1267 MB/s | 61423270 | 58.58 | +| lz5hc r132 level 4 | 122 MB/s | 892 MB/s | 55011906 | 52.46 | +| lz5hc r132 level 5 | 92 MB/s | 784 MB/s | 52790905 | 50.35 | +| lz5hc r132 level 6 | 40 MB/s | 872 MB/s | 52561673 | 50.13 | +| lz5hc r132 level 7 | 30 MB/s | 825 MB/s | 50947061 | 48.59 | +| lz5hc r132 level 8 | 21 MB/s | 771 MB/s | 50049555 | 47.73 | +| lz5hc r132 level 9 | 16 MB/s | 702 MB/s | 48718531 | 46.46 | +| lz5hc r132 level 10 | 12 MB/s | 670 MB/s | 48109030 | 45.88 | +| lz5hc r132 level 11 | 6.60 MB/s | 592 MB/s | 47639520 | 45.43 | +| lz5hc r132 level 12 | 3.22 MB/s | 670 MB/s | 47461368 | 45.26 | +| zstd_HC v0.3.6 level 1 | 250 MB/s | 529 MB/s | 51230550 | 48.86 | +| zstd_HC v0.3.6 level 2 | 186 MB/s | 498 MB/s | 49678572 | 47.38 | +| zstd_HC v0.3.6 level 3 | 90 MB/s | 484 MB/s | 48838293 | 46.58 | +| zstd_HC v0.3.6 level 5 | 61 MB/s | 467 MB/s | 46480999 | 44.33 | +| zstd_HC v0.3.6 level 7 | 28 MB/s | 480 MB/s | 44803941 | 42.73 | +| zstd_HC v0.3.6 level 9 | 15 MB/s | 497 MB/s | 43899996 | 41.87 | +| zstd_HC v0.3.6 level 12 | 11 MB/s | 505 MB/s | 42402232 | 40.44 | +| zstd_HC v0.3.6 level 16 | 2.29 MB/s | 499 MB/s | 42122327 | 40.17 | +| zstd_HC v0.3.6 level 20 | 1.65 MB/s | 454 MB/s | 41884658 | 39.94 | | brotli 2015-10-29 -1 | 86 MB/s | 208 MB/s | 47882059 | 45.66 | | brotli 2015-10-29 -3 | 60 MB/s | 214 MB/s | 47451223 | 45.25 | | brotli 2015-10-29 -5 | 17 MB/s | 217 MB/s | 43363897 | 41.36 | diff --git a/programs/lz5cli.c b/programs/lz5cli.c index be6ee98..a21197a 100644 --- a/programs/lz5cli.c +++ b/programs/lz5cli.c @@ -86,8 +86,8 @@ * Constants ******************************/ #define COMPRESSOR_NAME "LZ5 command line interface" -#define AUTHOR "Yann Collet" -#define WELCOME_MESSAGE "*** %s %i-bits %s, by %s (%s) ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), LZ5_VERSION, AUTHOR, __DATE__ +#define AUTHOR "Y.Collet & P.Skibinski" +#define WELCOME_MESSAGE "%s %i-bit %s by %s (%s)\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), LZ5_VERSION, AUTHOR, __DATE__ #define LZ5_EXTENSION ".lz5" #define LZ5CAT "lz5cat" #define UNLZ5 "unlz5" From 41e8b19245d1433779e3fd7a7da89e357d0e340b Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 30 Nov 2015 13:00:32 +0100 Subject: [PATCH 19/22] r132 beta --- NEWS | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS b/NEWS index f4c8652..3db9190 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,5 @@ r132 +- improved compression ratio - added: new parsers: LZ5HC_fast, LZ5HC_price_fast, LZ5HC_lowest_price - added: a special 1-byte codeword for the last occured offset - added: support for 3-byte long matches (MINMATCH = 3) From ea2e38cc13d9aad2ade3d8f94949e0832ac598d7 Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 30 Nov 2015 16:24:04 +0100 Subject: [PATCH 20/22] support for levels -0 to -12 from lz5.exe --- lib/lz5.c | 1 - lib/lz5common.h | 41 ++++++++++++++++++++++------------- lib/lz5frame.c | 2 +- lib/lz5hc.c | 1 - lib/mem.h | 22 ------------------- programs/Makefile | 4 ++-- programs/lz5cli.c | 55 ++++++++--------------------------------------- 7 files changed, 38 insertions(+), 88 deletions(-) diff --git a/lib/lz5.c b/lib/lz5.c index 2699167..1fb9a10 100644 --- a/lib/lz5.c +++ b/lib/lz5.c @@ -38,7 +38,6 @@ /************************************** * Includes **************************************/ -#include "mem.h" #include "lz5common.h" #include "lz5.h" #include diff --git a/lib/lz5common.h b/lib/lz5common.h index 13f2549..483f671 100644 --- a/lib/lz5common.h +++ b/lib/lz5common.h @@ -60,21 +60,6 @@ extern "C" { -/* ************************************* -* Local Compiler Options -***************************************/ -#if defined(__GNUC__) -# pragma GCC diagnostic ignored "-Wunused-function" -#endif - -#if defined (__clang__) -# pragma clang diagnostic ignored "-Wunused-function" -#endif - - - - - /************************************** * Memory routines **************************************/ @@ -125,9 +110,35 @@ static const int LZ5_minLength = (MFLIMIT+1); +/**************************************************************** +* Basic Types +*****************************************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef signed short S16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + + /* ************************************* * HC Inline functions and Macros ***************************************/ +#include "mem.h" // MEM_read + #if MINMATCH == 3 #define MEM_read24(ptr) (uint32_t)(MEM_read32(ptr)<<8) #else diff --git a/lib/lz5frame.c b/lib/lz5frame.c index e3e3bf6..0f88364 100644 --- a/lib/lz5frame.c +++ b/lib/lz5frame.c @@ -106,7 +106,7 @@ typedef unsigned long long U64; static const size_t minFHSize = 7; static const size_t maxFHSize = 15; static const size_t BHSize = 4; -static const int minHClevel = 3; +static const int minHClevel = 1; /************************************** diff --git a/lib/lz5hc.c b/lib/lz5hc.c index b38c6a4..67f04c6 100644 --- a/lib/lz5hc.c +++ b/lib/lz5hc.c @@ -39,7 +39,6 @@ /* ************************************* * Includes ***************************************/ -#include "mem.h" #include "lz5common.h" #include "lz5.h" #include "lz5hc.h" diff --git a/lib/mem.h b/lib/mem.h index ba43681..01e839f 100644 --- a/lib/mem.h +++ b/lib/mem.h @@ -61,28 +61,6 @@ extern "C" { #endif -/**************************************************************** -* Basic Types -*****************************************************************/ -#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef int16_t S16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; - typedef int64_t S64; -#else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef signed short S16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; - typedef signed long long S64; -#endif - /**************************************************************** * Memory I/O diff --git a/programs/Makefile b/programs/Makefile index c824fb5..039e48c 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -40,8 +40,8 @@ LZ5DIR := ../lib DESTDIR ?= PREFIX ?= /usr/local -OPTFLAGS ?= -O3 -fomit-frame-pointer -fstrict-aliasing -fforce-addr -ffast-math # can select customized optimized flags such as for example : CFLAGS="-O2 -g" make -CFLAGS ?= -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes -pedantic -Wno-variadic-macros -Wno-format +OPTFLAGS ?= -O3 -fomit-frame-pointer -fstrict-aliasing -fforce-addr -ffast-math # can select customized optimized flags such as for example: make CFLAGS="g" +CFLAGS ?= -std=c99 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Wstrict-prototypes -pedantic -Wno-variadic-macros -Wno-format -Wno-unused-function CPPFLAGS := -I$(LZ5DIR) -DXXH_NAMESPACE=LZ5_ -DLZ5_VERSION=\"$(RELEASE)\" FLAGS := $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(OPTFLAGS) diff --git a/programs/lz5cli.c b/programs/lz5cli.c index a21197a..5cb90eb 100644 --- a/programs/lz5cli.c +++ b/programs/lz5cli.c @@ -30,14 +30,6 @@ The license of this compression CLI program is GPLv2. */ -/************************************** -* Tuning parameters -***************************************/ -/* ENABLE_LZ5C_LEGACY_OPTIONS : - Control the availability of -c0, -c1 and -hc legacy arguments - Default : Legacy options are disabled */ -/* #define ENABLE_LZ5C_LEGACY_OPTIONS */ - /************************************** * Compiler Options @@ -60,6 +52,7 @@ #include /* strcmp, strlen */ #include "bench.h" /* BMK_benchFile, BMK_SetNbIterations, BMK_SetBlocksize, BMK_SetPause */ #include "lz5io.h" /* LZ5IO_compressFilename, LZ5IO_decompressFilename, LZ5IO_compressMultipleFilenames */ +#include "lz5common.h" /**************************** @@ -92,10 +85,6 @@ #define LZ5CAT "lz5cat" #define UNLZ5 "unlz5" -#define KB *(1U<<10) -#define MB *(1U<<20) -#define GB *(1U<<30) - #define LZ5_BLOCKSIZEID_DEFAULT 7 @@ -150,12 +139,12 @@ static int usage(void) DISPLAY( "input : a filename\n"); DISPLAY( " with no FILE, or when FILE is - or %s, read standard input\n", stdinmark); DISPLAY( "Arguments :\n"); - DISPLAY( " -1 : Fast compression (default) \n"); - DISPLAY( " -9 : High compression \n"); - DISPLAY( " -d : decompression (default for %s extension)\n", LZ5_EXTENSION); - DISPLAY( " -z : force compression\n"); - DISPLAY( " -f : overwrite output without prompting \n"); - DISPLAY( " -h/-H : display help/long help and exit\n"); + DISPLAY( " -0 : Fast compression (default) \n"); + DISPLAY( " -1...-%d : High compression; higher number == more compression but slower\n", LZ5HC_MAX_CLEVEL); + DISPLAY( " -d : decompression (default for %s extension)\n", LZ5_EXTENSION); + DISPLAY( " -z : force compression\n"); + DISPLAY( " -f : overwrite output without prompting \n"); + DISPLAY( " -h/-H : display help/long help and exit\n"); return 0; } @@ -181,13 +170,6 @@ static int usage_advanced(void) DISPLAY( "Benchmark arguments :\n"); DISPLAY( " -b : benchmark file(s)\n"); DISPLAY( " -i# : iteration loops [1-9](default : 3), benchmark mode only\n"); -#if defined(ENABLE_LZ5C_LEGACY_OPTIONS) - DISPLAY( "Legacy arguments :\n"); - DISPLAY( " -c0 : fast compression\n"); - DISPLAY( " -c1 : high compression\n"); - DISPLAY( " -hc : high compression\n"); - DISPLAY( " -y : overwrite output without prompting \n"); -#endif /* ENABLE_LZ5C_LEGACY_OPTIONS */ EXTENDED_HELP; return 0; } @@ -214,8 +196,8 @@ static int usage_longhelp(void) DISPLAY( "\n"); DISPLAY( "Compression levels : \n"); DISPLAY( "---------------------\n"); - DISPLAY( "-0 ... -2 => Fast compression, all identicals\n"); - DISPLAY( "-3 ... -16 => High compression; higher number == more compression but slower\n"); + DISPLAY( "-0 => Fast compression\n"); + DISPLAY( "-1 ... -%d => High compression; higher number == more compression but slower\n", LZ5HC_MAX_CLEVEL); DISPLAY( "\n"); DISPLAY( "stdin, stdout and the console : \n"); DISPLAY( "--------------------------------\n"); @@ -239,17 +221,6 @@ static int usage_longhelp(void) DISPLAY( "-------------------------------------\n"); DISPLAY( "3 : compress data stream from 'generator', send result to 'consumer'\n"); DISPLAY( " generator | %s | consumer \n", programName); -#if defined(ENABLE_LZ5C_LEGACY_OPTIONS) - DISPLAY( "\n"); - DISPLAY( "***** Warning *****\n"); - DISPLAY( "Legacy arguments take precedence. Therefore : \n"); - DISPLAY( "---------------------------------\n"); - DISPLAY( " %s -hc filename\n", programName); - DISPLAY( "means 'compress filename in high compression mode'\n"); - DISPLAY( "It is not equivalent to :\n"); - DISPLAY( " %s -h -c filename\n", programName); - DISPLAY( "which would display help text and exit\n"); -#endif /* ENABLE_LZ5C_LEGACY_OPTIONS */ return 0; } @@ -341,14 +312,6 @@ int main(int argc, char** argv) { argument ++; -#if defined(ENABLE_LZ5C_LEGACY_OPTIONS) - /* Legacy arguments (-c0, -c1, -hc, -y, -s) */ - if ((argument[0]=='c') && (argument[1]=='0')) { cLevel=0; argument++; continue; } /* -c0 (fast compression) */ - if ((argument[0]=='c') && (argument[1]=='1')) { cLevel=9; argument++; continue; } /* -c1 (high compression) */ - if ((argument[0]=='h') && (argument[1]=='c')) { cLevel=9; argument++; continue; } /* -hc (high compression) */ - if (*argument=='y') { LZ5IO_setOverwrite(1); continue; } /* -y (answer 'yes' to overwrite permission) */ -#endif /* ENABLE_LZ5C_LEGACY_OPTIONS */ - if ((*argument>='0') && (*argument<='9')) { cLevel = 0; From 3152a1729ef4eab38515d100778f8c3edd9dbe46 Mon Sep 17 00:00:00 2001 From: inikep Date: Mon, 30 Nov 2015 17:12:08 +0100 Subject: [PATCH 21/22] fixed: 32-bit compilation warnings --- lib/lz5.c | 8 +++--- lib/lz5common.h | 7 +++--- lib/lz5hc.c | 6 ++--- programs/Makefile | 64 +++++++++++++++++++---------------------------- 4 files changed, 37 insertions(+), 48 deletions(-) diff --git a/lib/lz5.c b/lib/lz5.c index 1fb9a10..8676099 100644 --- a/lib/lz5.c +++ b/lib/lz5.c @@ -249,7 +249,7 @@ FORCE_INLINE int LZ5_compress_generic( if ((outputLimited) && (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit))) return 0; /* Check output limit */ - if (ip-match >= LZ5_SHORT_OFFSET_DISTANCE && ip-match < LZ5_MID_OFFSET_DISTANCE && ip-match != last_off) + if (ip-match >= LZ5_SHORT_OFFSET_DISTANCE && ip-match < LZ5_MID_OFFSET_DISTANCE && (U32)(ip-match) != last_off) { if (litLength>=RUN_MASK) { @@ -279,7 +279,7 @@ FORCE_INLINE int LZ5_compress_generic( _next_match: /* Encode Offset */ - if (ip-match == last_off) + if ((U32)(ip-match) == last_off) { *token+=(3<= LZ5_SHORT_OFFSET_DISTANCE && ip-match < LZ5_MID_OFFSET_DISTANCE && ip-match != last_off) + if ((U32)(ip-match) >= LZ5_SHORT_OFFSET_DISTANCE && (U32)(ip-match) < LZ5_MID_OFFSET_DISTANCE && (U32)(ip-match) != last_off) { if (litLength>=RUN_MASK) { @@ -573,7 +573,7 @@ static int LZ5_compress_destSize_generic( _next_match: /* Encode Offset */ - if (ip-match == last_off) + if ((U32)(ip-match) == last_off) { *token+=(3<>8) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1; /* Check output limit */ - if (*ip-match >= LZ5_SHORT_OFFSET_DISTANCE && *ip-match < LZ5_MID_OFFSET_DISTANCE && *ip-match != ctx->last_off) + if (*ip-match >= LZ5_SHORT_OFFSET_DISTANCE && *ip-match < LZ5_MID_OFFSET_DISTANCE && (U32)(*ip-match) != ctx->last_off) { if (length>=(int)RUN_MASK) { int len; *token=(RUN_MASK< 254 ; len-=255) *(*op)++ = 255; *(*op)++ = (BYTE)len; } else *token = (BYTE)(length<last_off) + if ((U32)(*ip-match) == ctx->last_off) { *token+=(3<last_off) { ml2=0; goto _Encode; } + if ((U32)(ip - ref) == ctx->last_off) { ml2=0; goto _Encode; } { int back = 0; diff --git a/programs/Makefile b/programs/Makefile index 039e48c..1715e30 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -23,8 +23,7 @@ # - LZ5 public forum : https://groups.google.com/forum/#!forum/lz5c # ########################################################################## # lz5 : Command Line Utility, supporting gzip-like arguments -# lz5c : CLU, supporting also legacy lz5demo arguments -# lz5c32: Same as lz5c, but forced to compile in 32-bits mode +# lz5_32: Same as lz5, but forced to compile in 32-bits mode # frametest : Test tool, to check lz5frame integrity on target platform # frametest32: Same as frametest, but forced to compile in 32-bits mode # fullbench : Precisely measure speed for each LZ5 function variant @@ -63,7 +62,7 @@ FUZZER_TIME := -T9mn default: lz5 -m32: lz5c32 fullbench32 frametest32 +m32: lz5_32 fullbench32 frametest32 bins: lz5 fullbench frametest datagen @@ -72,11 +71,8 @@ all: bins m32 lz5: $(LZ5DIR)/lz5.o $(LZ5DIR)/lz5hc.o $(LZ5DIR)/lz5frame.o $(LZ5DIR)/xxhash.o bench.o lz5io.o lz5cli.o $(CC) $(FLAGS) $^ -o $@$(EXT) -lz5c : $(LZ5DIR)/lz5.o $(LZ5DIR)/lz5hc.o $(LZ5DIR)/lz5frame.o $(LZ5DIR)/xxhash.o bench.o lz5io.o lz5cli.o - $(CC) $(FLAGS) -DENABLE_LZ5C_LEGACY_OPTIONS $^ -o $@$(EXT) - -lz5c32: $(LZ5DIR)/lz5.c $(LZ5DIR)/lz5hc.c $(LZ5DIR)/lz5frame.c $(LZ5DIR)/xxhash.c bench.c lz5io.c lz5cli.c - $(CC) -m32 $(FLAGS) -DENABLE_LZ5C_LEGACY_OPTIONS $^ -o $@$(EXT) +lz5_32: $(LZ5DIR)/lz5.c $(LZ5DIR)/lz5hc.c $(LZ5DIR)/lz5frame.c $(LZ5DIR)/xxhash.c bench.c lz5io.c lz5cli.c + $(CC) -m32 $(FLAGS) $^ -o $@$(EXT) fullbench: $(LZ5DIR)/lz5.o $(LZ5DIR)/lz5hc.o $(LZ5DIR)/lz5frame.o $(LZ5DIR)/xxhash.o fullbench.o $(CC) $(FLAGS) $^ -o $@$(EXT) @@ -99,7 +95,7 @@ datagen : datagen.o datagencli.c clean: @$(MAKE) -C $(LZ5DIR) $@ > $(VOID) @rm -f core *.o *.test tmp* \ - lz5$(EXT) lz5c$(EXT) lz5c32$(EXT) \ + lz5$(EXT) lz5_32$(EXT) \ fullbench$(EXT) fullbench32$(EXT) \ frametest$(EXT) frametest32$(EXT) \ datagen$(EXT) @@ -110,16 +106,14 @@ clean: #make install is validated only for Linux, OSX, kFreeBSD and Hurd targets ifneq (,$(filter $(shell uname),Linux Darwin GNU/kFreeBSD GNU)) -install: lz5 lz5c +install: lz5 @echo Installing binaries @install -d -m 755 $(DESTDIR)$(BINDIR)/ $(DESTDIR)$(MANDIR)/ @install -m 755 lz5 $(DESTDIR)$(BINDIR)/lz5 @ln -sf lz5 $(DESTDIR)$(BINDIR)/lz5cat @ln -sf lz5 $(DESTDIR)$(BINDIR)/unlz5 - @install -m 755 lz5c $(DESTDIR)$(BINDIR)/lz5c @echo Installing man pages @install -m 644 lz5.1 $(DESTDIR)$(MANDIR)/lz5.1 - @ln -sf lz5.1 $(DESTDIR)$(MANDIR)/lz5c.1 @ln -sf lz5.1 $(DESTDIR)$(MANDIR)/lz5cat.1 @ln -sf lz5.1 $(DESTDIR)$(MANDIR)/unlz5.1 @echo lz5 installation completed @@ -128,16 +122,14 @@ uninstall: rm -f $(DESTDIR)$(BINDIR)/lz5cat rm -f $(DESTDIR)$(BINDIR)/unlz5 [ -x $(DESTDIR)$(BINDIR)/lz5 ] && rm -f $(DESTDIR)$(BINDIR)/lz5 - [ -x $(DESTDIR)$(BINDIR)/lz5c ] && rm -f $(DESTDIR)$(BINDIR)/lz5c [ -f $(DESTDIR)$(MANDIR)/lz5.1 ] && rm -f $(DESTDIR)$(MANDIR)/lz5.1 - rm -f $(DESTDIR)$(MANDIR)/lz5c.1 rm -f $(DESTDIR)$(MANDIR)/lz5cat.1 rm -f $(DESTDIR)$(MANDIR)/unlz5.1 @echo lz5 programs successfully uninstalled -test: test-lz5 test-lz5c test-frametest test-fullbench test-fuzzer test-mem +test: test-lz5 test-frametest test-fullbench test-fuzzer test-mem -test32: test-lz5c32 test-frametest32 test-fullbench32 test-fuzzer32 test-mem32 +test32: test-lz5_32 test-frametest32 test-fullbench32 test-fuzzer32 test-mem32 test-all: test test32 @@ -234,28 +226,24 @@ test-lz5: lz5 datagen test-lz5-basic test-lz5-multiple test-lz5-sparse test-lz5- @echo "\n ---- test pass-through ----" ./datagen | ./lz5 -tf -test-lz5c: lz5c datagen - @echo "\n ---- test lz5c version ----" - ./datagen -g256MB | ./lz5c -l -v | ./lz5c -t - -test-interop-32-64: lz5 lz5c32 datagen +test-interop-32-64: lz5 lz5_32 datagen @echo "\n ---- test interoperability 32-bits -vs- 64 bits ----" - ./datagen -g16KB | ./lz5c32 -9 | ./lz5 -t - ./datagen -P10 | ./lz5 -9B4 | ./lz5c32 -t - ./datagen | ./lz5c32 | ./lz5 -t - ./datagen -g1M | ./lz5 -3B5 | ./lz5c32 -t - ./datagen -g256MB | ./lz5c32 -vqB4D | ./lz5 -qt - ./datagen -g1G -P90 | ./lz5 | ./lz5c32 -t - ./datagen -g6GB | ./lz5c32 -vq9BD | ./lz5 -qt - -test-lz5c32-basic: lz5c32 datagen - @echo "\n ---- test lz5c32 32-bits version ----" - ./datagen -g16KB | ./lz5c32 -9 | ./lz5c32 -t - ./datagen | ./lz5c32 | ./lz5c32 -t - ./datagen -g256MB | ./lz5c32 -vqB4D | ./lz5c32 -qt - ./datagen -g6GB | ./lz5c32 -vqB5D | ./lz5c32 -qt - -test-lz5c32: test-lz5c32-basic test-interop-32-64 + ./datagen -g16KB | ./lz5_32 -9 | ./lz5 -t + ./datagen -P10 | ./lz5 -9B4 | ./lz5_32 -t + ./datagen | ./lz5_32 | ./lz5 -t + ./datagen -g1M | ./lz5 -3B5 | ./lz5_32 -t + ./datagen -g256MB | ./lz5_32 -vqB4D | ./lz5 -qt + ./datagen -g1G -P90 | ./lz5 | ./lz5_32 -t + ./datagen -g6GB | ./lz5_32 -vq9BD | ./lz5 -qt + +test-lz5_32-basic: lz5_32 datagen + @echo "\n ---- test lz5_32 32-bits version ----" + ./datagen -g16KB | ./lz5_32 -9 | ./lz5_32 -t + ./datagen | ./lz5_32 | ./lz5_32 -t + ./datagen -g256MB | ./lz5_32 -vqB4D | ./lz5_32 -qt + ./datagen -g6GB | ./lz5_32 -vqB5D | ./lz5_32 -qt + +test-lz5_32: test-lz5_32-basic test-interop-32-64 test-fullbench: fullbench ./fullbench --no-prompt $(NB_LOOPS) $(TEST_FILES) @@ -294,7 +282,7 @@ test-mem: lz5 datagen fuzzer frametest fullbench valgrind --leak-check=yes --error-exitcode=1 ./fuzzer -i64 -t1 valgrind --leak-check=yes --error-exitcode=1 ./frametest -i256 -test-mem32: lz5c32 datagen +test-mem32: lz5_32 datagen # unfortunately, valgrind doesn't seem to work with non-native binary... endif From 4301f278298cf8746a39844ad92ddb8bc17247e3 Mon Sep 17 00:00:00 2001 From: inikep Date: Tue, 1 Dec 2015 10:57:03 +0100 Subject: [PATCH 22/22] memory leak fix (thanks to m^2) --- lib/lz5frame.c | 12 +++++++++--- lib/lz5hc.c | 14 ++++++++------ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/lib/lz5frame.c b/lib/lz5frame.c index 0f88364..b81d0a6 100644 --- a/lib/lz5frame.c +++ b/lib/lz5frame.c @@ -335,7 +335,7 @@ size_t LZ5F_compressFrame(void* dstBuffer, size_t dstMaxSize, const void* srcBuf dstPtr += errorCode; if (prefs.compressionLevel >= (int)minHClevel) /* no allocation necessary with lz5 fast */ - FREEMEM(cctxI.lz5CtxPtr); + LZ5_freeStreamHC(cctxI.lz5CtxPtr); return (dstPtr - dstStart); } @@ -375,7 +375,10 @@ LZ5F_errorCode_t LZ5F_freeCompressionContext(LZ5F_compressionContext_t LZ5F_comp if (cctxPtr != NULL) /* null pointers can be safely provided to this function, like free() */ { - FREEMEM(cctxPtr->lz5CtxPtr); + if (cctxPtr->prefs.compressionLevel < minHClevel) + FREEMEM(cctxPtr->lz5CtxPtr); + else + LZ5_freeStreamHC(cctxPtr->lz5CtxPtr); FREEMEM(cctxPtr->tmpBuff); FREEMEM(LZ5F_compressionContext); } @@ -410,7 +413,10 @@ size_t LZ5F_compressBegin(LZ5F_compressionContext_t compressionContext, void* ds U32 tableID = (cctxPtr->prefs.compressionLevel < minHClevel) ? 1 : 2; /* 0:nothing ; 1:LZ5 table ; 2:HC tables */ if (cctxPtr->lz5CtxLevel < tableID) { - FREEMEM(cctxPtr->lz5CtxPtr); + if (cctxPtr->prefs.compressionLevel < minHClevel) + FREEMEM(cctxPtr->lz5CtxPtr); + else + LZ5_freeStreamHC(cctxPtr->lz5CtxPtr); if (cctxPtr->prefs.compressionLevel < minHClevel) cctxPtr->lz5CtxPtr = (void*)LZ5_createStream(); else diff --git a/lib/lz5hc.c b/lib/lz5hc.c index ffec5c1..de18f9b 100644 --- a/lib/lz5hc.c +++ b/lib/lz5hc.c @@ -75,10 +75,10 @@ int LZ5_alloc_mem_HC(LZ5HC_Data_Structure* ctx, int compressionLevel) return 1; } -void LZ5_free_mem_HC(LZ5HC_Data_Structure* statePtr) +void LZ5_free_mem_HC(LZ5HC_Data_Structure* ctx) { - if (statePtr->chainTable) FREEMEM(statePtr->chainTable); - if (statePtr->hashTable) FREEMEM(statePtr->hashTable); + if (ctx->chainTable) FREEMEM(ctx->chainTable); + if (ctx->hashTable) FREEMEM(ctx->hashTable); } @@ -982,15 +982,17 @@ LZ5_streamHC_t* LZ5_createStreamHC(int compressionLevel) FREEMEM(statePtr); return NULL; } - return (LZ5_streamHC_t*) statePtr; } int LZ5_freeStreamHC (LZ5_streamHC_t* LZ5_streamHCPtr) { LZ5HC_Data_Structure* statePtr = (LZ5HC_Data_Structure*)LZ5_streamHCPtr; - LZ5_free_mem_HC(statePtr); - free(LZ5_streamHCPtr); + if (statePtr) + { + LZ5_free_mem_HC(statePtr); + free(LZ5_streamHCPtr); + } return 0; }