Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve help formatting in CLI tools #390

Merged
merged 21 commits into from
Jul 19, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 57 additions & 52 deletions apps/build_disk_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "math_utils.h"
#include "index.h"
#include "partition.h"
#include "program_options_utils.hpp"

namespace po = boost::program_options;

Expand All @@ -21,61 +22,65 @@ int main(int argc, char **argv)
bool append_reorder_data = false;
bool use_opq = false;

po::options_description desc{"Arguments"};
po::options_description desc{
program_options_utils::make_program_description("build_disk_index", "Build a disk-based index.")};
try
{
desc.add_options()("help,h", "Print information on arguments");
desc.add_options()("data_type", po::value<std::string>(&data_type)->required(), "data type <int8/uint8/float>");
desc.add_options()("dist_fn", po::value<std::string>(&dist_fn)->required(), "distance function <l2/mips>");
desc.add_options()("data_path", po::value<std::string>(&data_path)->required(),
"Input data file in bin format");
desc.add_options()("index_path_prefix", po::value<std::string>(&index_path_prefix)->required(),
"Path prefix for saving index file components");
desc.add_options()("max_degree,R", po::value<uint32_t>(&R)->default_value(64), "Maximum graph degree");
desc.add_options()("Lbuild,L", po::value<uint32_t>(&L)->default_value(100),
"Build complexity, higher value results in better graphs");
desc.add_options()("search_DRAM_budget,B", po::value<float>(&B)->required(),
"DRAM budget in GB for searching the index to set the "
"compressed level for data while search happens");
desc.add_options()("build_DRAM_budget,M", po::value<float>(&M)->required(),
"DRAM budget in GB for building the index");
desc.add_options()("num_threads,T", po::value<uint32_t>(&num_threads)->default_value(omp_get_num_procs()),
"Number of threads used for building index (defaults to "
"omp_get_num_procs())");
desc.add_options()("QD", po::value<uint32_t>(&QD)->default_value(0), " Quantized Dimension for compression");
desc.add_options()("codebook_prefix", po::value<std::string>(&codebook_prefix)->default_value(""),
"Path prefix for pre-trained codebook");
desc.add_options()("PQ_disk_bytes", po::value<uint32_t>(&disk_PQ)->default_value(0),
"Number of bytes to which vectors should be compressed "
"on SSD; 0 for no compression");
desc.add_options()("append_reorder_data", po::bool_switch()->default_value(false),
"Include full precision data in the index. Use only in "
"conjuction with compressed data on SSD.");
desc.add_options()("build_PQ_bytes", po::value<uint32_t>(&build_PQ)->default_value(0),
"Number of PQ bytes to build the index; 0 for full "
"precision build");
desc.add_options()("use_opq", po::bool_switch()->default_value(false),
"Use Optimized Product Quantization (OPQ).");
desc.add_options()("label_file", po::value<std::string>(&label_file)->default_value(""),
"Input label file in txt format for Filtered Index build ."
"The file should contain comma separated filters for each node "
"with each line corresponding to a graph node");
desc.add_options()("universal_label", po::value<std::string>(&universal_label)->default_value(""),
"Universal label, Use only in conjuction with label file for "
"filtered "
"index build. If a graph node has all the labels against it, we "
"can "
"assign a special universal filter to the point instead of comma "
"separated filters for that point");
desc.add_options()("FilteredLbuild", po::value<uint32_t>(&Lf)->default_value(0),
"Build complexity for filtered points, higher value "
"results in better graphs");
desc.add_options()("filter_threshold,F", po::value<uint32_t>(&filter_threshold)->default_value(0),
"Threshold to break up the existing nodes to generate new graph "
"internally where each node has a maximum F labels.");
desc.add_options()("label_type", po::value<std::string>(&label_type)->default_value("uint"),
"Storage type of Labels <uint/ushort>, default value is uint which "
"will consume memory 4 bytes per filter");

// Required parameters
po::options_description required_configs("Required");
required_configs.add_options()("data_type", po::value<std::string>(&data_type)->required(),
program_options_utils::DATA_TYPE_DESCRIPTION);
required_configs.add_options()("dist_fn", po::value<std::string>(&dist_fn)->required(),
program_options_utils::DISTANCE_FUNCTION_DESCRIPTION);
required_configs.add_options()("index_path_prefix", po::value<std::string>(&index_path_prefix)->required(),
program_options_utils::INDEX_PATH_PREFIX_DESCRIPTION);
required_configs.add_options()("data_path", po::value<std::string>(&data_path)->required(),
program_options_utils::INPUT_DATA_PATH);
required_configs.add_options()("search_DRAM_budget,B", po::value<float>(&B)->required(),
"DRAM budget in GB for searching the index to set the "
"compressed level for data while search happens");
required_configs.add_options()("build_DRAM_budget,M", po::value<float>(&M)->required(),
"DRAM budget in GB for building the index");

// Optional parameters
po::options_description optional_configs("Optional");
optional_configs.add_options()("num_threads,T",
po::value<uint32_t>(&num_threads)->default_value(omp_get_num_procs()),
program_options_utils::NUMBER_THREADS_DESCRIPTION);
optional_configs.add_options()("max_degree,R", po::value<uint32_t>(&R)->default_value(64),
program_options_utils::MAX_BUILD_DEGREE);
optional_configs.add_options()("Lbuild,L", po::value<uint32_t>(&L)->default_value(100),
program_options_utils::GRAPH_BUILD_COMPLEXITY);
optional_configs.add_options()("QD", po::value<uint32_t>(&QD)->default_value(0),
" Quantized Dimension for compression");
optional_configs.add_options()("codebook_prefix", po::value<std::string>(&codebook_prefix)->default_value(""),
"Path prefix for pre-trained codebook");
optional_configs.add_options()("PQ_disk_bytes", po::value<uint32_t>(&disk_PQ)->default_value(0),
"Number of bytes to which vectors should be compressed "
"on SSD; 0 for no compression");
optional_configs.add_options()("append_reorder_data", po::bool_switch()->default_value(false),
"Include full precision data in the index. Use only in "
"conjuction with compressed data on SSD.");
optional_configs.add_options()("build_PQ_bytes", po::value<uint32_t>(&build_PQ)->default_value(0),
program_options_utils::BUIlD_GRAPH_PQ_BYTES);
optional_configs.add_options()("use_opq", po::bool_switch()->default_value(false),
program_options_utils::USE_OPQ);
optional_configs.add_options()("label_file", po::value<std::string>(&label_file)->default_value(""),
program_options_utils::LABEL_FILE);
optional_configs.add_options()("universal_label", po::value<std::string>(&universal_label)->default_value(""),
program_options_utils::UNIVERSAL_LABEL);
optional_configs.add_options()("FilteredLbuild", po::value<uint32_t>(&Lf)->default_value(0),
program_options_utils::FILTERED_LBUILD);
optional_configs.add_options()("filter_threshold,F", po::value<uint32_t>(&filter_threshold)->default_value(0),
"Threshold to break up the existing nodes to generate new graph "
"internally where each node has a maximum F labels.");
optional_configs.add_options()("label_type", po::value<std::string>(&label_type)->default_value("uint"),
program_options_utils::LABEL_TYPE_DESCRIPTION);

// Merge required and optional parameters
desc.add(required_configs).add(optional_configs);

po::variables_map vm;
po::store(po::parse_command_line(argc, argv, desc), vm);
Expand Down
80 changes: 42 additions & 38 deletions apps/build_memory_index.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "index.h"
#include "utils.h"
#include "program_options_utils.hpp"

#ifndef _WINDOWS
#include <sys/mman.h>
Expand Down Expand Up @@ -72,47 +73,50 @@ int main(int argc, char **argv)
float alpha;
bool use_pq_build, use_opq;

po::options_description desc{"Arguments"};
po::options_description desc{
program_options_utils::make_program_description("build_memory_index", "Build a memory-based DiskANN index.")};
try
{
desc.add_options()("help,h", "Print information on arguments");
desc.add_options()("data_type", po::value<std::string>(&data_type)->required(), "data type <int8/uint8/float>");
desc.add_options()("dist_fn", po::value<std::string>(&dist_fn)->required(),
"distance function <l2/mips/cosine>");
desc.add_options()("data_path", po::value<std::string>(&data_path)->required(),
"Input data file in bin format");
desc.add_options()("index_path_prefix", po::value<std::string>(&index_path_prefix)->required(),
"Path prefix for saving index file components");
desc.add_options()("max_degree,R", po::value<uint32_t>(&R)->default_value(64), "Maximum graph degree");
desc.add_options()("Lbuild,L", po::value<uint32_t>(&L)->default_value(100),
"Build complexity, higher value results in better graphs");
desc.add_options()("alpha", po::value<float>(&alpha)->default_value(1.2f),
"alpha controls density and diameter of graph, set "
"1 for sparse graph, "
"1.2 or 1.4 for denser graphs with lower diameter");
desc.add_options()("num_threads,T", po::value<uint32_t>(&num_threads)->default_value(omp_get_num_procs()),
"Number of threads used for building index (defaults to "
"omp_get_num_procs())");
desc.add_options()("build_PQ_bytes", po::value<uint32_t>(&build_PQ_bytes)->default_value(0),
"Number of PQ bytes to build the index; 0 for full precision "
"build");
desc.add_options()("use_opq", po::bool_switch()->default_value(false),
"Set true for OPQ compression while using PQ "
"distance comparisons for "
"building the index, and false for PQ compression");
desc.add_options()("label_file", po::value<std::string>(&label_file)->default_value(""),
"Input label file in txt format for Filtered Index search. "
"The file should contain comma separated filters for each node "
"with each line corresponding to a graph node");
desc.add_options()("universal_label", po::value<std::string>(&universal_label)->default_value(""),
"Universal label, if using it, only in conjunction with "
"labels_file");
desc.add_options()("FilteredLbuild", po::value<uint32_t>(&Lf)->default_value(0),
"Build complexity for filtered points, higher value "
"results in better graphs");
desc.add_options()("label_type", po::value<std::string>(&label_type)->default_value("uint"),
"Storage type of Labels <uint/ushort>, default value is uint which "
"will consume memory 4 bytes per filter");

// Required parameters
po::options_description required_configs("Required");
required_configs.add_options()("data_type", po::value<std::string>(&data_type)->required(),
program_options_utils::DATA_TYPE_DESCRIPTION);
required_configs.add_options()("dist_fn", po::value<std::string>(&dist_fn)->required(),
program_options_utils::DISTANCE_FUNCTION_DESCRIPTION);
required_configs.add_options()("index_path_prefix", po::value<std::string>(&index_path_prefix)->required(),
program_options_utils::INDEX_PATH_PREFIX_DESCRIPTION);
required_configs.add_options()("data_path", po::value<std::string>(&data_path)->required(),
program_options_utils::INPUT_DATA_PATH);

// Optional parameters
po::options_description optional_configs("Optional");
optional_configs.add_options()("num_threads,T",
po::value<uint32_t>(&num_threads)->default_value(omp_get_num_procs()),
program_options_utils::NUMBER_THREADS_DESCRIPTION);
optional_configs.add_options()("max_degree,R", po::value<uint32_t>(&R)->default_value(64),
program_options_utils::MAX_BUILD_DEGREE);
optional_configs.add_options()("Lbuild,L", po::value<uint32_t>(&L)->default_value(100),
program_options_utils::GRAPH_BUILD_COMPLEXITY);
optional_configs.add_options()("alpha", po::value<float>(&alpha)->default_value(1.2f),
program_options_utils::GRAPH_BUILD_ALPHA);
optional_configs.add_options()("build_PQ_bytes", po::value<uint32_t>(&build_PQ_bytes)->default_value(0),
program_options_utils::BUIlD_GRAPH_PQ_BYTES);
optional_configs.add_options()("use_opq", po::bool_switch()->default_value(false),
program_options_utils::USE_OPQ);
optional_configs.add_options()("label_file", po::value<std::string>(&label_file)->default_value(""),
program_options_utils::LABEL_FILE);
optional_configs.add_options()("universal_label", po::value<std::string>(&universal_label)->default_value(""),
program_options_utils::UNIVERSAL_LABEL);

optional_configs.add_options()("FilteredLbuild", po::value<uint32_t>(&Lf)->default_value(0),
program_options_utils::FILTERED_LBUILD);
optional_configs.add_options()("label_type", po::value<std::string>(&label_type)->default_value("uint"),
program_options_utils::LABEL_TYPE_DESCRIPTION);

// Merge required and optional parameters
desc.add(required_configs).add(optional_configs);

po::variables_map vm;
po::store(po::parse_command_line(argc, argv, desc), vm);
Expand Down
Loading