Skip to content

Commit

Permalink
[yugabyte#21830] DocDB: Import the usearch library
Browse files Browse the repository at this point in the history
Summary:
Import the usearch header-only library and its dependency, the fp16 library. Both of these libraries are header-only.

Introducing an inline-thirdparty directory in the src directory where we can easily import header-only libraries by copying the relevant header sources. We can switch to git subtrees, or create a tool automate pulling in upstream changes into our own git repository later.
Jira: DB-10732

Test Plan:
Jenkins: test regex: usearch

New test: usearch_vector_index-test

Reviewers: tnayak

Reviewed By: tnayak

Subscribers: jason, ybase

Differential Revision: https://phorge.dev.yugabyte.com/D33682
  • Loading branch information
mbautin committed Apr 9, 2024
1 parent 5cd2cf4 commit 7ba576d
Show file tree
Hide file tree
Showing 12 changed files with 9,070 additions and 2 deletions.
9 changes: 7 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,6 @@ if(IS_GCC)
# https://gist.githubusercontent.com/mbautin/de18543ea85d46db49dfa4b4b7df082a/raw
ADD_CXX_FLAGS("-Wno-use-after-free")
endif()

endif()

if(USING_LINUXBREW)
Expand Down Expand Up @@ -600,10 +599,16 @@ file(MAKE_DIRECTORY "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}")
set(EXECUTABLE_OUTPUT_PATH "${YB_BUILD_ROOT}/bin")
file(MAKE_DIRECTORY "${EXECUTABLE_OUTPUT_PATH}")

# Generated sources always have higher priority.
# Generated sources always have higher priority than identically named sources in the source
# directory.
include_directories(${CMAKE_CURRENT_BINARY_DIR}/src)

include_directories(src)

include_directories("src/inline-thirdparty/usearch")
include_directories("src/inline-thirdparty/fp16")


enable_testing()

if (USING_LINUXBREW)
Expand Down
20 changes: 20 additions & 0 deletions src/inline-thirdparty/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# inline-thirdparty

This is a directory where we copy some of the third-party header-only libraries, rather than adding
them to the yugabyte-db-thirdparty repo. We also only copy the relevant subdirectory of the upstream
repositories. Each library is copied in its own appropriately named directory, and each library's
directory is added separately to the list of include directories in CMakeLists.txt.

* usearch
* Repo: https://github.com/yugabyte/usearch
* Description: Similarity search for vector and text
* Subdirectory: include
* Tag: v2.11.0-yb-1
* License: Apache 2.0

* fp16
* Repo: https://github.com/Maratyszcza/FP16/
* Description: Header-only library for conversion to/from half-precision floating point formats
* Subdirectory: include
* Commit: 0a92994d729ff76a58f692d3028ca1b64b145d91
* License: MIT
11 changes: 11 additions & 0 deletions src/inline-thirdparty/fp16/fp16.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#pragma once
#ifndef FP16_H
#define FP16_H

#include <fp16/fp16.h>

#if defined(PSIMD_H)
#include <fp16/psimd.h>
#endif

#endif /* FP16_H */
92 changes: 92 additions & 0 deletions src/inline-thirdparty/fp16/fp16/bitcasts.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#pragma once
#ifndef FP16_BITCASTS_H
#define FP16_BITCASTS_H

#if defined(__cplusplus) && (__cplusplus >= 201103L)
#include <cstdint>
#elif !defined(__OPENCL_VERSION__)
#include <stdint.h>
#endif

#if defined(__INTEL_COMPILER)
#include <immintrin.h>
#endif

#if defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
#include <intrin.h>
#endif


static inline float fp32_from_bits(uint32_t w) {
#if defined(__OPENCL_VERSION__)
return as_float(w);
#elif defined(__CUDA_ARCH__)
return __uint_as_float((unsigned int) w);
#elif defined(__INTEL_COMPILER)
return _castu32_f32(w);
#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
return _CopyFloatFromInt32((__int32) w);
#else
union {
uint32_t as_bits;
float as_value;
} fp32 = { w };
return fp32.as_value;
#endif
}

static inline uint32_t fp32_to_bits(float f) {
#if defined(__OPENCL_VERSION__)
return as_uint(f);
#elif defined(__CUDA_ARCH__)
return (uint32_t) __float_as_uint(f);
#elif defined(__INTEL_COMPILER)
return _castf32_u32(f);
#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
return (uint32_t) _CopyInt32FromFloat(f);
#else
union {
float as_value;
uint32_t as_bits;
} fp32 = { f };
return fp32.as_bits;
#endif
}

static inline double fp64_from_bits(uint64_t w) {
#if defined(__OPENCL_VERSION__)
return as_double(w);
#elif defined(__CUDA_ARCH__)
return __longlong_as_double((long long) w);
#elif defined(__INTEL_COMPILER)
return _castu64_f64(w);
#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
return _CopyDoubleFromInt64((__int64) w);
#else
union {
uint64_t as_bits;
double as_value;
} fp64 = { w };
return fp64.as_value;
#endif
}

static inline uint64_t fp64_to_bits(double f) {
#if defined(__OPENCL_VERSION__)
return as_ulong(f);
#elif defined(__CUDA_ARCH__)
return (uint64_t) __double_as_longlong(f);
#elif defined(__INTEL_COMPILER)
return _castf64_u64(f);
#elif defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64))
return (uint64_t) _CopyInt64FromDouble(f);
#else
union {
double as_value;
uint64_t as_bits;
} fp64 = { f };
return fp64.as_bits;
#endif
}

#endif /* FP16_BITCASTS_H */
Loading

0 comments on commit 7ba576d

Please sign in to comment.