Skip to content

Commit 530346d

Browse files
committed
ManhattanDistance returns float, so bit vector is no longer limited by UINT16_MAX
1 parent c26cfea commit 530346d

File tree

2 files changed

+3
-10
lines changed

2 files changed

+3
-10
lines changed

ydb/library/yql/udfs/common/knn/knn-distance.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ static std::optional<float> KnnManhattanDistance(const TStringRef& str1, const T
4848
if (Y_UNLIKELY(vector1.size() != vector2.size() || vector1.empty() || vector1.size() > UINT16_MAX))
4949
return {};
5050

51-
ui16 ret = 0;
51+
ui64 ret = 0;
5252
for (size_t i = 0; i < vector1.size(); ++i)
5353
ret += __builtin_popcountll(vector1[i] ^ vector2[i]);
5454
return ret;
@@ -91,10 +91,10 @@ static std::optional<float> KnnEuclideanDistance(const TStringRef& str1, const T
9191
if (Y_UNLIKELY(vector1.size() != vector2.size() || vector1.empty() || vector1.size() > UINT16_MAX))
9292
return {};
9393

94-
ui16 ret = 0;
94+
ui64 ret = 0;
9595
for (size_t i = 0; i < vector1.size(); ++i)
9696
ret += __builtin_popcountll(vector1[i] ^ vector2[i]);
97-
return NPrivate::NL2Distance::L2DistanceSqrt<ui64>(ret);
97+
return NPrivate::NL2Distance::L2DistanceSqrt(ret);
9898
}
9999
default:
100100
return {};

ydb/library/yql/udfs/common/knn/knn-serializer.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,12 @@ class TKnnVectorSerializer {
8080
// Encode all positive floats as bit 1, negative floats as bit 0.
8181
// So 1024 float vector is serialized in 1024/8=128 bytes.
8282
// Place all bits in ui64. So, only vector sizes divisible by 64 are supported.
83-
// Max vector lenght is 32767.
8483
class TKnnBitVectorSerializer {
8584
public:
8685
static TUnboxedValue Serialize(const IValueBuilder* valueBuilder, const TUnboxedValue x) {
8786
auto serialize = [&x] (IOutputStream& outStream) {
8887
ui64 accumulator = 0;
8988
ui8 filledBits = 0;
90-
ui64 lenght = 0;
9189

9290
EnumerateVector(x, [&] (float element) {
9391
if (element > 0)
@@ -96,7 +94,6 @@ class TKnnBitVectorSerializer {
9694
++filledBits;
9795
if (filledBits == 64) {
9896
outStream.Write(&accumulator, sizeof(ui64));
99-
lenght++;
10097
accumulator = 0;
10198
filledBits = 0;
10299
}
@@ -106,10 +103,6 @@ class TKnnBitVectorSerializer {
106103
if (Y_UNLIKELY(filledBits))
107104
return false;
108105

109-
// max vector lenght is 32767
110-
if (Y_UNLIKELY(lenght > UINT16_MAX))
111-
return false;
112-
113106
const EFormat format = EFormat::BitVector;
114107
outStream.Write(&format, HeaderLen);
115108

0 commit comments

Comments
 (0)