Skip to content

Commit

Permalink
Merge branch 'main-dev' of https://github.com/ashvardanian/StringZilla
Browse files Browse the repository at this point in the history
…into main-dev
  • Loading branch information
ashvardanian committed Oct 17, 2024
2 parents fb55d54 + 1891dbf commit 4e33434
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 4 deletions.
12 changes: 12 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,16 @@ if(${STRINGZILLA_BUILD_SHARED})
"SZ_USE_ARM_NEON=1"
"SZ_USE_ARM_SVE=1")
endif()

if (MSVC)
# Add dependencies for necessary runtime libraries in case of static linking
# This ensures that basic runtime functions are available:
# msvcrt.lib: Microsoft Visual C Runtime, required for basic C runtime functions on Windows.
# vcruntime.lib: Microsoft Visual C++ Runtime library for basic runtime functions.
# ucrt.lib: Universal C Runtime, necessary for linking basic C functions like I/O.
target_link_libraries(${target} PRIVATE msvcrt.lib vcruntime.lib ucrt.lib)
endif()

endfunction()

define_shared(stringzilla_shared)
Expand All @@ -344,4 +354,6 @@ if(${STRINGZILLA_BUILD_SHARED})
"$<$<CXX_COMPILER_ID:MSVC>:/Oi-;/GS->")
target_link_options(stringzillite PRIVATE "$<$<CXX_COMPILER_ID:GNU,Clang>:-nostdlib>")
target_link_options(stringzillite PRIVATE "$<$<CXX_COMPILER_ID:MSVC>:/NODEFAULTLIB>")


endif()
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ __Who is this for?__
<span style="color:#ABABAB;">arm:</span> <b>9.4</b> MB/s
</td>
<td align="center">
<code>uniform_int_distribution</code><br/>
<code>std::uniform_int_distribution</code><br/>
<span style="color:#ABABAB;">x86:</span> <b>47.2</b> &centerdot;
<span style="color:#ABABAB;">arm:</span> <b>20.4</b> MB/s
</td>
Expand All @@ -193,7 +193,7 @@ __Who is this for?__
<tr>
<td align="center">⚪</td>
<td align="center">
<code>transform</code><br/>
<code>std::transform</code><br/>
<span style="color:#ABABAB;">x86:</span> <b>3.81</b> &centerdot;
<span style="color:#ABABAB;">arm:</span> <b>2.65</b> GB/s
</td>
Expand Down
12 changes: 10 additions & 2 deletions include/stringzilla/stringzilla.h
Original file line number Diff line number Diff line change
Expand Up @@ -5323,8 +5323,16 @@ SZ_PUBLIC void sz_look_up_transform_avx512(sz_cptr_t source, sz_size_t length, s
// operate on 4 registers, it might be cleaner to use 2x separate `_mm512_permutexvar_epi8` calls.
// Combining the results with 2x `_mm512_test_epi8_mask` and 3x blends afterwards.
//
// - `_mm512_mask_blend_epi8` - 1 cycle latency, and generally 2x can run in parallel.
// - `_mm512_test_epi8_mask` - 3 cycles latency, same as most comparison functions in AVX-512.
// - 4x `_mm512_permutexvar_epi8` maps to "VPERMB (ZMM, ZMM, ZMM)":
// - On Ice Lake: 3 cycles latency, ports: 1*p5
// - On Genoa: 6 cycles latency, ports: 1*FP12
// - 3x `_mm512_mask_blend_epi8` maps to "VPBLENDMB_Z (ZMM, K, ZMM, ZMM)":
// - On Ice Lake: 3 cycles latency, ports: 1*p05
// - On Genoa: 1 cycle latency, ports: 1*FP0123
// - 2x `_mm512_test_epi8_mask` maps to "VPTESTMB (K, ZMM, ZMM)":
// - On Ice Lake: 3 cycles latency, ports: 1*p5
// - On Genoa: 4 cycles latency, ports: 1*FP01
//
sz_u512_vec_t lut_0_to_63_vec, lut_64_to_127_vec, lut_128_to_191_vec, lut_192_to_255_vec;
lut_0_to_63_vec.zmm = _mm512_loadu_si512((lut));
lut_64_to_127_vec.zmm = _mm512_loadu_si512((lut + 64));
Expand Down

0 comments on commit 4e33434

Please sign in to comment.