Skip to content

Commit

Permalink
fix search by GPU
Browse files Browse the repository at this point in the history
Signed-off-by: shengjun.li <shengjun.li@zilliz.com>
  • Loading branch information
shengjun.li committed May 28, 2020
1 parent 3d9cc69 commit 6f7fc0a
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 31 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Please mark all change in change log and use the issue from GitHub
- \#2395 Fix large nq cudaMalloc error
- \#2399 The nlist set by the user may not take effect
- \#2403 MySQL max_idle_time is 10 by default
- \#2450 The deleted vectors may be found on GPU

## Feature

Expand Down
7 changes: 2 additions & 5 deletions core/src/index/thirdparty/faiss/gpu/impl/IVFUtilsSelect1.cu
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,9 @@ pass1SelectLists(void** listIndices,
topQueryToCentroid,
opt);
if (bitsetEmpty || (!(bitset[index >> 3] & (0x1 << (index & 0x7))))) {
heap.add(distanceStart[i], start + i);
} else {
heap.add((1.0 / 0.0), start + i);
heap.addThreadQ(distanceStart[i], start + i);
}
heap.checkThreadQ();
}

// Handle warp divergence separately
Expand All @@ -91,8 +90,6 @@ pass1SelectLists(void** listIndices,
opt);
if (bitsetEmpty || (!(bitset[index >> 3] & (0x1 << (index & 0x7))))) {
heap.addThreadQ(distanceStart[i], start + i);
} else {
heap.addThreadQ((1.0 / 0.0), start + i);
}
}

Expand Down
9 changes: 3 additions & 6 deletions core/src/index/thirdparty/faiss/gpu/impl/L2Select.cu
Original file line number Diff line number Diff line change
Expand Up @@ -161,20 +161,17 @@ __global__ void l2SelectMinK(Tensor<T, 2, true> productDistances,
if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) {
v = Math<T>::add(centroidDistances[i],
productDistances[row][i]);
} else {
v = (T)(1.0 / 0.0);
heap.addThreadQ(v, i);
}
heap.add(v, i);
heap.checkThreadQ();
}

if (i < productDistances.getSize(1)) {
if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) {
v = Math<T>::add(centroidDistances[i],
productDistances[row][i]);
} else {
v = (T)(1.0 / 0.0);
heap.addThreadQ(v, i);
}
heap.addThreadQ(v, i);
}

heap.reduce();
Expand Down
16 changes: 5 additions & 11 deletions core/src/index/thirdparty/faiss/gpu/utils/BlockSelectKernel.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -146,20 +146,17 @@ __global__ void blockSelect(Tensor<K, 2, true> in,

for (; i < limit; i += ThreadsPerBlock) {
if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) {
heap.add(*inStart, (IndexType) i);
} else {
heap.add(-1.0, (IndexType) i);
heap.addThreadQ(*inStart, (IndexType) i);
}

heap.checkThreadQ();

inStart += ThreadsPerBlock;
}

// Handle last remainder fraction of a warp of elements
if (i < in.getSize(1)) {
if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) {
heap.addThreadQ(*inStart, (IndexType) i);
} else {
heap.addThreadQ(-1.0, (IndexType) i);
}
}

Expand Down Expand Up @@ -208,10 +205,9 @@ __global__ void blockSelectPair(Tensor<K, 2, true> inK,

for (; i < limit; i += ThreadsPerBlock) {
if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) {
heap.add(*inKStart, *inVStart);
} else {
heap.add(-1.0, *inVStart);
heap.addThreadQ(*inKStart, *inVStart);
}
heap.checkThreadQ();

inKStart += ThreadsPerBlock;
inVStart += ThreadsPerBlock;
Expand All @@ -221,8 +217,6 @@ __global__ void blockSelectPair(Tensor<K, 2, true> inK,
if (i < inK.getSize(1)) {
if (bitsetEmpty || (!(bitset[i >> 3] & (0x1 << (i & 0x7))))) {
heap.addThreadQ(*inKStart, *inVStart);
} else {
heap.addThreadQ(-1.0, *inVStart);
}
}

Expand Down
9 changes: 0 additions & 9 deletions core/src/scheduler/task/SearchTask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,15 +283,6 @@ XSearchTask::Execute() {

{
std::unique_lock<std::mutex> lock(search_job->mutex());

if (search_job->GetResultIds().size() > spec_k) {
if (search_job->GetResultIds().front() == -1) {
// initialized results set
search_job->GetResultIds().resize(spec_k * nq);
search_job->GetResultDistances().resize(spec_k * nq);
}
}

search_job->vector_count() = nq;
XSearchTask::MergeTopkToResultSet(output_ids, output_distance, spec_k, nq, topk, ascending_reduce,
search_job->GetResultIds(), search_job->GetResultDistances());
Expand Down

0 comments on commit 6f7fc0a

Please sign in to comment.