forked from dusty-nv/jetson-containers
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
266 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#--- | ||
# name: faiss | ||
# group: vectordb | ||
# config: config.py | ||
# depends: [numpy, cmake] | ||
# test: [test.py] | ||
#--- | ||
ARG BASE_IMAGE | ||
FROM ${BASE_IMAGE} | ||
|
||
WORKDIR /opt | ||
|
||
ARG CUDA_ARCHITECTURES | ||
|
||
ARG FAISS_REPO=facebookresearch/faiss | ||
ARG FAISS_BRANCH=main | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y --no-install-recommends \ | ||
libopenblas-dev \ | ||
swig \ | ||
&& rm -rf /var/lib/apt/lists/* \ | ||
&& apt-get clean | ||
|
||
# workaround for 'Could NOT find Python3 (missing: Python3_NumPy_INCLUDE_DIRS Development' | ||
RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 && \ | ||
apt purge -y python3.9 libpython3.9* || echo "python3.9 not found, skipping removal" && \ | ||
ls -ll /usr/bin/python* | ||
|
||
ADD https://api.github.com/repos/${FAISS_REPO}/git/refs/heads/${FAISS_BRANCH} /tmp/faiss_version.json | ||
|
||
RUN git clone --branch=${FAISS_BRANCH} --depth=1 https://github.com/${FAISS_REPO} | ||
|
||
RUN mkdir faiss/build && \ | ||
cd faiss/build && \ | ||
cmake \ | ||
-DFAISS_ENABLE_GPU=ON \ | ||
-DFAISS_ENABLE_PYTHON=ON \ | ||
-DFAISS_ENABLE_RAFT=ON \ | ||
-DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCHITECTURES} \ | ||
../ && \ | ||
make -j$(nproc) faiss && \ | ||
make install | ||
|
||
RUN cd faiss/build && \ | ||
make demo_ivfpq_indexing && \ | ||
make demo_ivfpq_indexing_gpu | ||
|
||
RUN cd faiss/build && \ | ||
make -j$(nproc) swigfaiss | ||
|
||
RUN cd faiss/build/faiss/python && \ | ||
python3 setup.py --verbose bdist_wheel && \ | ||
cp dist/faiss*.whl /opt | ||
|
||
RUN pip3 install --no-cache-dir --verbose /opt/faiss*.whl | ||
|
||
WORKDIR / | ||
|
||
RUN pip3 show faiss && python3 -c 'import faiss' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
#!/usr/bin/env python3 | ||
import os | ||
import time | ||
import socket | ||
import datetime | ||
import argparse | ||
|
||
import faiss | ||
import numpy as np | ||
|
||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) | ||
|
||
parser.add_argument('-k', type=int, default=4, help='the number of nearest neighbors to search for') | ||
parser.add_argument('-d', '--dim', type=int, default=5120, help='the dimensionality of the embedding vectors') # 2621440 | ||
|
||
parser.add_argument('--index', type=str, default='Flat', help='the type of index to use') # https://github.com/facebookresearch/faiss/wiki/Faiss-indexes | ||
parser.add_argument('--index-size', type=int, default=4096, help='the number of vectors to add to the index') | ||
parser.add_argument('--index-batch', type=int, default=1, help='the number of vectors to add to index at a time') | ||
|
||
parser.add_argument('--search-queries', type=int, default=4096, help='the number of search queries to run') | ||
parser.add_argument('--search-batch', type=int, default=1, help='the number of search queries to run at a time') | ||
|
||
parser.add_argument('--dtype', type=str, default='float32', help='datatype of the vectors') | ||
parser.add_argument('--seed', type=int, default=1234, help='change the random seed used') | ||
parser.add_argument('--cpu', action='store_true', help='disable GPU acceleration') | ||
parser.add_argument('--save', type=str, default='', help='CSV file to save benchmarking results to') | ||
|
||
|
||
args = parser.parse_args() | ||
print(args) | ||
|
||
np.random.seed(args.seed) | ||
|
||
print(f"building random numpy arrays ({args.index_size}, {args.dim})") | ||
|
||
xb = np.random.random((args.index_size, args.dim)).astype(args.dtype) | ||
xb[:, 0] += np.arange(args.index_size) / 1000. | ||
xq = np.random.random((args.search_queries, args.dim)).astype(args.dtype) | ||
xq[:, 0] += np.arange(args.search_queries) / 1000. | ||
|
||
print(xb.shape, xb.dtype) | ||
|
||
vector_size = (args.dim * xb.itemsize) / (1024*1024) # size of one vector in MB | ||
|
||
print(f"vector size: {vector_size*1024*1024:.0f} bytes") | ||
print(f"numpy array size: {(xb.size * xb.itemsize) / (1024*1024):.3f} MB") | ||
print(f"creating index type: {args.index}") | ||
|
||
index = faiss.index_factory(args.dim, args.index) #faiss.IndexFlatL2(args.dim) | ||
|
||
if not args.cpu: | ||
res = faiss.StandardGpuResources() # use a single GPU | ||
index = faiss.index_cpu_to_gpu(res, 0, index) | ||
|
||
if not index.is_trained: | ||
print(f"training index {args.index}") | ||
index.train(xb) | ||
|
||
# profile indexing | ||
avg_index_time = 0 | ||
avg_index_rate = 0 | ||
|
||
avg_factor = 1.0 / (args.index_size / args.index_batch) | ||
|
||
for i in range(0, args.index_size, args.index_batch): | ||
time_begin = time.perf_counter() | ||
index.add(xb[i:i+args.index_batch]) | ||
index_time = time.perf_counter() - time_begin | ||
index_rate = args.index_batch / index_time | ||
avg_index_time += index_time * avg_factor | ||
avg_index_rate += index_rate * avg_factor | ||
if i % 32 == 0: | ||
print(f"added ({args.index_batch}, {args.dim}) vectors: {index_time*1000:.2f} ms, {index_rate:.1f} vectors/sec, {index_rate*vector_size:.1f} MB/s") | ||
|
||
def print_index_stats(): | ||
print(f"{args.index} index size: ({index.ntotal}, {args.dim})") | ||
print(f"{args.index} index time: {avg_index_time*1000:.2f} ms") | ||
print(f"{args.index} index rate: {avg_index_rate:.1f} vectors/sec") | ||
print(f"{args.index} index bandwidth: {avg_index_rate*vector_size:.1f} MB/s") | ||
print(f"{args.index} index trained: {index.is_trained}") | ||
|
||
# profile search | ||
avg_search_time = 0 | ||
avg_search_rate = 0 | ||
|
||
avg_factor = 1.0 / (args.search_queries / args.search_batch) | ||
|
||
for i in range(0, args.search_queries, args.search_batch): | ||
time_begin = time.perf_counter() | ||
D, I = index.search(xq[i:i+args.search_batch], args.k) | ||
search_time = time.perf_counter() - time_begin | ||
search_rate = args.search_batch / search_time | ||
avg_search_time += search_time * avg_factor | ||
avg_search_rate += search_rate * avg_factor | ||
if i % 32 == 0: | ||
print(f"search ({args.search_batch}, {args.dim}) vectors: {search_time*1000:.2f} ms, {search_rate:.1f} vectors/sec, {search_rate*vector_size:.1f} MB/s") | ||
|
||
def print_search_stats(): | ||
print(f"{args.index} search size: ({args.search_batch}, {args.dim})") | ||
print(f"{args.index} search time: {avg_search_time*1000:.2f} ms") | ||
print(f"{args.index} search rate: {avg_search_rate:.1f} vectors/sec") | ||
print(f"{args.index} search bandwidth: {avg_search_rate*vector_size:.1f} MB/s") | ||
|
||
print("\n") | ||
print_index_stats() | ||
print("") | ||
print_search_stats() | ||
|
||
# https://github.com/facebookresearch/faiss/wiki/FAQ#why-does-the-ram-usage-not-go-down-when-i-delete-an-index | ||
memory_usage = faiss.get_mem_usage_kb() / 1024 | ||
print(f"\nPeak memory usage: {memory_usage:.1f} MB") | ||
|
||
|
||
if args.save: | ||
if not os.path.isfile(args.save): # csv header | ||
with open(args.save, 'w') as file: | ||
file.write(f"timestamp, hostname, api, device, index, dtype, vector_dim, num_vectors, ") | ||
file.write(f"index_batch, index_time, index_rate, index_bandwidth, ") | ||
file.write(f"search_batch, search_time, search_rate, search_bandwidth, memory\n") | ||
with open(args.save, 'a') as file: | ||
file.write(f"{datetime.datetime.now().strftime('%Y%m%d %H:%M:%S')}, {socket.gethostname()}, faiss, ") | ||
file.write(f"{'cpu' if args.cpu else 'cuda'}, {args.index}, {args.dtype}, {args.dim}, {args.index_size}, ") | ||
file.write(f"{args.index_batch}, {avg_index_time}, {avg_index_rate}, {avg_index_rate*vector_size}, ") | ||
file.write(f"{args.search_batch}, {avg_search_time}, {avg_search_rate}, {avg_search_rate*vector_size}, {memory_usage}\n") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
|
||
from jetson_containers import CUDA_ARCHITECTURES | ||
|
||
package['build_args'] = { | ||
'CUDA_ARCHITECTURES': ';'.join([str(x) for x in CUDA_ARCHITECTURES]), | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
#!/usr/bin/env python3 | ||
import time | ||
import argparse | ||
|
||
import faiss | ||
import numpy as np | ||
|
||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) | ||
|
||
parser.add_argument('-k', type=int, default=4) | ||
parser.add_argument('-d', '--dim', type=int, default=64) # 2621440 | ||
parser.add_argument('--num-vectors', type=int, default=100000) # 512 | ||
parser.add_argument('--num-queries', type=int, default=1) | ||
parser.add_argument('--seed', type=int, default=1234) | ||
parser.add_argument('--cpu', action='store_true') | ||
|
||
args = parser.parse_args() | ||
print(args) | ||
|
||
np.random.seed(args.seed) | ||
|
||
print(f"building random numpy arrays ({args.num_vectors}, {args.dim})") | ||
|
||
xb = np.random.random((args.num_vectors, args.dim)).astype('float32') | ||
xb[:, 0] += np.arange(args.num_vectors) / 1000. | ||
xq = np.random.random((args.num_queries, args.dim)).astype('float32') | ||
xq[:, 0] += np.arange(args.num_queries) / 1000. | ||
|
||
print(f"numpy array size: {(xb.size * xb.itemsize) / (1024*1024):.3f} MB") | ||
print(f"creating index") | ||
|
||
index = faiss.IndexFlatL2(args.dim) # build the index | ||
|
||
if not args.cpu: | ||
res = faiss.StandardGpuResources() # use a single GPU | ||
index = faiss.index_cpu_to_gpu(res, 0, index) | ||
|
||
# https://github.com/facebookresearch/faiss/wiki/FAQ#why-does-the-ram-usage-not-go-down-when-i-delete-an-index | ||
print(f"mem usage: {faiss.get_mem_usage_kb() / 1024:.3f} MB") | ||
print(index.is_trained) | ||
|
||
time_begin = time.perf_counter() | ||
index.add(xb[:-1]) # add vectors to the index | ||
print(f"time to add {xb.shape} vectors: {time.perf_counter()-time_begin:.3} sec") | ||
print(index.ntotal) | ||
|
||
time_begin = time.perf_counter() | ||
index.add(xb[-1:]) # add vectors to the index | ||
print(f"time to add 1 vector: {time.perf_counter()-time_begin:.3} sec") | ||
print(index.ntotal) | ||
|
||
def search(queries, k=args.k): | ||
time_begin = time.perf_counter() | ||
D, I = index.search(queries, k) # sanity check | ||
print(I) | ||
print(D) | ||
print(f"time to search {len(queries)}: {time.perf_counter()-time_begin:.3} sec") | ||
|
||
""" | ||
Sanity check on the first 5 vectors: | ||
[[ 0 393 363 78] | ||
[ 1 555 277 364] | ||
[ 2 304 101 13] | ||
[ 3 173 18 182] | ||
[ 4 288 370 531]] | ||
[[ 0. 7.17517328 7.2076292 7.25116253] | ||
[ 0. 6.32356453 6.6845808 6.79994535] | ||
[ 0. 5.79640865 6.39173603 7.28151226] | ||
[ 0. 7.27790546 7.52798653 7.66284657] | ||
[ 0. 6.76380348 7.29512024 7.36881447]] | ||
""" | ||
search(xb[:5]) | ||
search(xq) |