Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,6 @@ To design custom accelerators for your application, use Vitis library functions
Combine domain-specific Vitis libraries with pre-optimized deep learning models from the Vitis AI library or the Vitis AI development kit to accelerate your whole application and meet the overall system-level functionality and performance goals.

![Scalable and Flexible Library Functions](https://xilinx.github.io/Vitis_Libraries/_images/1568760747007.png)

# Support & Contact
To report any issues or request support, please post your question to the Vitis section of the [Adaptive SoC & FPGA Community Forums](https://adaptivesupport.amd.com/s/topic/0TO2E000000YKXhWAO/vitis?language=en_US)
74 changes: 54 additions & 20 deletions data_compression/common/thirdParty/zlib-1.2.11/deflate.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,6 @@ int stream_size;
int wrap = 1;
static const char my_version[] = ZLIB_VERSION;

ushf* overlay;
/* We overlay pending_buf and d_buf+l_buf. This works since the average
* output size for (length,distance) codes is <= 24 bits.
*/

if (version == Z_NULL || version[0] != my_version[0] || stream_size != sizeof(z_stream)) {
return Z_VERSION_ERROR;
}
Expand Down Expand Up @@ -314,18 +309,60 @@ int stream_size;

s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */

overlay = (ushf*)ZALLOC(strm, s->lit_bufsize, sizeof(ush) + 2);
s->pending_buf = (uchf*)overlay;
s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush) + 2L);
/* We overlay pending_buf and sym_buf. This works since the average size
* for length/distance pairs over any compressed block is assured to be 31
* bits or less.
*
* Analysis: The longest fixed codes are a length code of 8 bits plus 5
* extra bits, for lengths 131 to 257. The longest fixed distance codes are
* 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest
* possible fixed-codes length/distance pair is then 31 bits total.
*
* sym_buf starts one-fourth of the way into pending_buf. So there are
* three bytes in sym_buf for every four bytes in pending_buf. Each symbol
* in sym_buf is three bytes -- two for the distance and one for the
* literal/length. As each symbol is consumed, the pointer to the next
* sym_buf value to read moves forward three bytes. From that symbol, up to
* 31 bits are written to pending_buf. The closest the written pending_buf
* bits gets to the next sym_buf symbol to read is just before the last
* code is written. At that time, 31*(n-2) bits have been written, just
* after 24*(n-2) bits have been consumed from sym_buf. sym_buf starts at
* 8*n bits into pending_buf. (Note that the symbol buffer fills when n-1
* symbols are written.) The closest the writing gets to what is unread is
* then n+14 bits. Here n is lit_bufsize, which is 16384 by default, and
* can range from 128 to 32768.
*
* Therefore, at a minimum, there are 142 bits of space between what is
* written and what is read in the overlain buffers, so the symbols cannot
* be overwritten by the compressed data. That space is actually 139 bits,
* due to the three-bit fixed-code block header.
*
* That covers the case where either Z_FIXED is specified, forcing fixed
* codes, or when the use of fixed codes is chosen, because that choice
* results in a smaller compressed block than dynamic codes. That latter
* condition then assures that the above analysis also covers all dynamic
* blocks. A dynamic-code block will only be chosen to be emitted if it has
* fewer bits than a fixed-code block would for the same set of symbols.
* Therefore its average symbol length is assured to be less than 31. So
* the compressed data for a dynamic block also cannot overwrite the
* symbols from which it is being constructed.
*/

s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, 4);
s->pending_buf_size = (ulg)s->lit_bufsize * 4;

if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || s->pending_buf == Z_NULL) {
s->status = FINISH_STATE;
strm->msg = ERR_MSG(Z_MEM_ERROR);
deflateEnd(strm);
return Z_MEM_ERROR;
}
s->d_buf = overlay + s->lit_bufsize / sizeof(ush);
s->l_buf = s->pending_buf + (1 + sizeof(ush)) * s->lit_bufsize;
s->sym_buf = s->pending_buf + s->lit_bufsize;
s->sym_end = (s->lit_bufsize - 1) * 3;
/* We avoid equality with lit_bufsize*3 because of wraparound at 64K
* on 16 bit machines and because stored blocks are restricted to
* 64K-1 bytes.
*/

s->level = level;
s->strategy = strategy;
Expand Down Expand Up @@ -512,7 +549,7 @@ int value;

if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
s = strm->state;
if ((Bytef*)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) return Z_BUF_ERROR;
if (s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) return Z_BUF_ERROR;
do {
put = Buf_size - s->bi_valid;
if (put > bits) put = bits;
Expand Down Expand Up @@ -1033,7 +1070,6 @@ z_streamp source;
#else
deflate_state* ds;
deflate_state* ss;
ushf* overlay;

if (deflateStateCheck(source) || dest == Z_NULL) {
return Z_STREAM_ERROR;
Expand All @@ -1052,8 +1088,7 @@ z_streamp source;
ds->window = (Bytef*)ZALLOC(dest, ds->w_size, 2 * sizeof(Byte));
ds->prev = (Posf*)ZALLOC(dest, ds->w_size, sizeof(Pos));
ds->head = (Posf*)ZALLOC(dest, ds->hash_size, sizeof(Pos));
overlay = (ushf*)ZALLOC(dest, ds->lit_bufsize, sizeof(ush) + 2);
ds->pending_buf = (uchf*)overlay;
ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, 4);

if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || ds->pending_buf == Z_NULL) {
deflateEnd(dest);
Expand All @@ -1066,8 +1101,7 @@ z_streamp source;
zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size);

ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf);
ds->d_buf = overlay + ds->lit_bufsize / sizeof(ush);
ds->l_buf = ds->pending_buf + (1 + sizeof(ush)) * ds->lit_bufsize;
ds->sym_buf = ds->pending_buf + ds->lit_bufsize;

ds->l_desc.dyn_tree = ds->dyn_ltree;
ds->d_desc.dyn_tree = ds->dyn_dtree;
Expand Down Expand Up @@ -1792,7 +1826,7 @@ int flush;
FLUSH_BLOCK(s, 1);
return finish_done;
}
if (s->last_lit) FLUSH_BLOCK(s, 0);
if (s->sym_next) FLUSH_BLOCK(s, 0);
return block_done;
}

Expand Down Expand Up @@ -1918,7 +1952,7 @@ int flush;
FLUSH_BLOCK(s, 1);
return finish_done;
}
if (s->last_lit) FLUSH_BLOCK(s, 0);
if (s->sym_next) FLUSH_BLOCK(s, 0);
return block_done;
}
#endif /* FASTEST */
Expand Down Expand Up @@ -1987,7 +2021,7 @@ int flush;
FLUSH_BLOCK(s, 1);
return finish_done;
}
if (s->last_lit) FLUSH_BLOCK(s, 0);
if (s->sym_next) FLUSH_BLOCK(s, 0);
return block_done;
}

Expand Down Expand Up @@ -2023,6 +2057,6 @@ int flush;
FLUSH_BLOCK(s, 1);
return finish_done;
}
if (s->last_lit) FLUSH_BLOCK(s, 0);
if (s->sym_next) FLUSH_BLOCK(s, 0);
return block_done;
}
25 changes: 11 additions & 14 deletions data_compression/common/thirdParty/zlib-1.2.11/deflate.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ typedef struct internal_state {
/* Depth of each subtree used as tie breaker for trees of equal frequency
*/

uchf* l_buf; /* buffer for literals or lengths */
uchf* sym_buf; /* buffer for distances and literals/lengths */

uInt lit_bufsize;
/* Size of match buffer for literals/lengths. There are 4 reasons for
Expand All @@ -238,13 +238,8 @@ typedef struct internal_state {
* - I can't count above 4
*/

uInt last_lit; /* running index in l_buf */

ushf* d_buf;
/* Buffer for distances. To simplify the code, d_buf and l_buf have
* the same number of elements. To use different lengths, an extra flag
* array would be necessary.
*/
uInt sym_next; /* running index in sym_buf */
uInt sym_end; /* symbol table full when sym_next reaches this */

ulg opt_len; /* bit length of current block with optimal trees */
ulg static_len; /* bit length of current block with static trees */
Expand Down Expand Up @@ -322,21 +317,23 @@ extern const uch ZLIB_INTERNAL _dist_code[];
#define _tr_tally_lit(s, c, flush) \
{ \
uch cc = (c); \
s->d_buf[s->last_lit] = 0; \
s->l_buf[s->last_lit++] = cc; \
s->sym_buf[s->sym_next++] = 0; \
s->sym_buf[s->sym_next++] = 0; \
s->sym_buf[s->sym_next++] = cc; \
s->dyn_ltree[cc].Freq++; \
flush = (s->last_lit == s->lit_bufsize - 1); \
flush = (s->sym_next == s->sym_end); \
}
#define _tr_tally_dist(s, distance, length, flush) \
{ \
uch len = (uch)(length); \
ush dist = (ush)(distance); \
s->d_buf[s->last_lit] = dist; \
s->l_buf[s->last_lit++] = len; \
s->sym_buf[s->sym_next++] = dist; \
s->sym_buf[s->sym_next++] = dist >> 8; \
s->sym_buf[s->sym_next++] = len; \
dist--; \
s->dyn_ltree[_length_code[len] + LITERALS + 1].Freq++; \
s->dyn_dtree[d_code(dist)].Freq++; \
flush = (s->last_lit == s->lit_bufsize - 1); \
flush = (s->sym_next == s->sym_end); \
}
#else
#define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
Expand Down
47 changes: 14 additions & 33 deletions data_compression/common/thirdParty/zlib-1.2.11/trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ local void init_block(s) deflate_state* s;

s->dyn_ltree[END_BLOCK].Freq = 1;
s->opt_len = s->static_len = 0L;
s->last_lit = s->matches = 0;
s->sym_next = s->matches = 0;
}

#define SMALLEST 1
Expand Down Expand Up @@ -915,7 +915,7 @@ int last; /* one if this is the last block for a file */
static_lenb = (s->static_len + 3 + 7) >> 3;

Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", opt_lenb, s->opt_len, static_lenb,
s->static_len, stored_len, s->last_lit));
s->static_len, stored_len, s->sym_next / 3));

if (static_lenb <= opt_lenb) opt_lenb = static_lenb;

Expand Down Expand Up @@ -979,8 +979,9 @@ int ZLIB_INTERNAL _tr_tally(s, dist, lc) deflate_state* s;
unsigned dist; /* distance of matched string */
unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */
{
s->d_buf[s->last_lit] = (ush)dist;
s->l_buf[s->last_lit++] = (uch)lc;
s->sym_buf[s->sym_next++] = dist;
s->sym_buf[s->sym_next++] = dist >> 8;
s->sym_buf[s->sym_next++] = lc;
if (dist == 0) {
/* lc is the unmatched char */
s->dyn_ltree[lc].Freq++;
Expand All @@ -995,28 +996,7 @@ unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */
s->dyn_ltree[_length_code[lc] + LITERALS + 1].Freq++;
s->dyn_dtree[d_code(dist)].Freq++;
}

#ifdef TRUNCATE_BLOCK
/* Try to guess if it is profitable to stop the current block here */
if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
/* Compute an upper bound for the compressed length */
ulg out_length = (ulg)s->last_lit * 8L;
ulg in_length = (ulg)((long)s->strstart - s->block_start);
int dcode;
for (dcode = 0; dcode < D_CODES; dcode++) {
out_length += (ulg)s->dyn_dtree[dcode].Freq * (5L + extra_dbits[dcode]);
}
out_length >>= 3;
Tracev((stderr, "\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", s->last_lit, in_length, out_length,
100L - out_length * 100L / in_length));
if (s->matches < s->last_lit / 2 && out_length < in_length / 2) return 1;
}
#endif
return (s->last_lit == s->lit_bufsize - 1);
/* We avoid equality with lit_bufsize because of wraparound at 64K
* on 16 bit machines and because stored blocks are restricted to
* 64K-1 bytes.
*/
return (s->sym_next == s->sym_end);
}

/* ===========================================================================
Expand All @@ -1028,13 +1008,14 @@ const ct_data* dtree; /* distance tree */
{
unsigned dist; /* distance of matched string */
int lc; /* match length or unmatched char (if dist == 0) */
unsigned lx = 0; /* running index in l_buf */
unsigned sx = 0; /* running index in sym_buf */
unsigned code; /* the code to send */
int extra; /* number of extra bits to send */

if (s->last_lit != 0) do {
dist = s->d_buf[lx];
lc = s->l_buf[lx++];
if (s->sym_next != 0) do {
dist = s->sym_buf[sx++] & 0xff;
dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
lc = s->sym_buf[sx++];
if (dist == 0) {
send_code(s, lc, ltree); /* send a literal byte */
Tracecv(isgraph(lc), (stderr, " '%c' ", lc));
Expand All @@ -1059,10 +1040,10 @@ const ct_data* dtree; /* distance tree */
}
} /* literal or match pair ? */

/* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
Assert((uInt)(s->pending) < s->lit_bufsize + 2 * lx, "pendingBuf overflow");
/* Check that the overlay between pending_buf and sym_buf is ok: */
Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");

} while (lx < s->last_lit);
} while (sx < s->sym_next);

send_code(s, END_BLOCK, ltree);
}
Expand Down
6 changes: 3 additions & 3 deletions dsp/Jenkinsfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
@Library('pipeline-library')_

VitisLibPipeline (branch: 'next', libname: 'xf_dsp', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build:vitis_aie_sim:vitis_aie_x86sim',
upstream_dependencies: 'xf_utils_hw,next,../utils; xf_data_mover,next,../data_mover; dsplib_internal_scripts,main,../dsplib_internal_scripts',
devtest: 'RunDeploy.sh', TOOLVERSION: '2024.2_stable_latest',
VitisLibPipeline (branch: 'main', libname: 'xf_dsp', TARGETS: 'hls_csim:hls_csynth:hls_cosim:vitis_sw_emu:vitis_hw_emu:vitis_hw_build:vitis_aie_sim:vitis_aie_x86sim',
upstream_dependencies: 'xf_utils_hw,main,../utils; xf_data_mover,main,../data_mover; dsplib_internal_scripts,main,../dsplib_internal_scripts',
devtest: 'RunDeploy.sh', TOOLVERSION: '2024.2_released',
email: 'berry@amd.com',
post_launch: '../dsplib_internal_scripts/scripts/jenkins/post_launch_wrapper.sh |& tee -a reporting_log.txt')
11 changes: 2 additions & 9 deletions dsp/L1/include/aie/fir_tdm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,10 +200,7 @@ class kernelFilterClass {
#if __HAS_ACCUM_PERMUTES__ == 1
// cint16/int16 combo can be overloaded with 2 column MUL/MACs.
static constexpr unsigned int columnMultiple =
(std::is_same<TT_DATA, cint16>::value && std::is_same<TT_COEFF, int16>::value) &&
(TP_TDM_CHANNELS > m_kVOutSize) && (TP_TDM_CHANNELS % (2 * m_kVOutSize) == 0)
? 2
: 1;
(std::is_same<TT_DATA, cint16>::value && std::is_same<TT_COEFF, int16>::value) ? 2 : 1;
static constexpr unsigned int coeffToDataMultiple = 1;
#else
static constexpr unsigned int columnMultiple = 1;
Expand Down Expand Up @@ -256,11 +253,7 @@ class kernelFilterClass {
// Operate on multiple frames in parallel, when possible.
// Optimized to reduce data loads, handy when 512-bits of data and 256-bits of coeffs are needed on each clock
// cycle.
static constexpr unsigned int useEvenFrames =
(TP_NUM_FRAMES % 2 == 0 && columnMultiple == 2 && TP_TDM_CHANNELS > m_kVOutSize &&
TP_TDM_CHANNELS % kSamplesInVectData == 0)
? 1
: 0;
static constexpr unsigned int useEvenFrames = (TP_NUM_FRAMES % 2 == 0 && columnMultiple == 2) ? 1 : 0;
// TDM FIR Margin = (TP_FIR_LEN-1)*TP_TDM_CHANNELS
// or set to 0, if handled with internal buffer.
static constexpr unsigned int enableInternalMargin = __HAS_ACCUM_PERMUTES__ ? 1 : 0;
Expand Down
Loading