Skip to content

Commit 2c9912a

Browse files
jonmcleanJon McLeanJonathan McLean
authored
Improve help formatting in CLI tools (#390)
* Added utilities to standardize help across cli tools. #370 * Made three option groupings (required/optional/print) * Moved common parameter descriptions to a common file. #370 * Updated usage statement for search_disk_app #370 * Updated range_search_disk_index to use the new required/optional format. #370 * Updated test apps to use the new help format. #370 * Fixed format issue. #370 * Updated help format for the 'build' apps. #370 * Fixed code formatting. #370 * Added src/*.hpp to the clang format. #370 * Moved header into the headers directory. #370 * Added missing configs. #370 * Removed superflous paths from include. #370 * Added #pragma once. #370 * Type-o fixes. #370 * Fixed capitolization of constant. #370 * Make fail_if_recall description more accurate. #370 * Changed to using set notation. #370 * Better explanations for some options. #370 * Added short explanation of file format. #370 --------- Co-authored-by: Jon McLean <[email protected]> Co-authored-by: Jonathan McLean <[email protected]>
1 parent f636da4 commit 2c9912a

10 files changed

+457
-285
lines changed

apps/build_disk_index.cpp

Lines changed: 57 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "math_utils.h"
1010
#include "index.h"
1111
#include "partition.h"
12+
#include "program_options_utils.hpp"
1213

1314
namespace po = boost::program_options;
1415

@@ -21,61 +22,65 @@ int main(int argc, char **argv)
2122
bool append_reorder_data = false;
2223
bool use_opq = false;
2324

24-
po::options_description desc{"Arguments"};
25+
po::options_description desc{
26+
program_options_utils::make_program_description("build_disk_index", "Build a disk-based index.")};
2527
try
2628
{
2729
desc.add_options()("help,h", "Print information on arguments");
28-
desc.add_options()("data_type", po::value<std::string>(&data_type)->required(), "data type <int8/uint8/float>");
29-
desc.add_options()("dist_fn", po::value<std::string>(&dist_fn)->required(), "distance function <l2/mips>");
30-
desc.add_options()("data_path", po::value<std::string>(&data_path)->required(),
31-
"Input data file in bin format");
32-
desc.add_options()("index_path_prefix", po::value<std::string>(&index_path_prefix)->required(),
33-
"Path prefix for saving index file components");
34-
desc.add_options()("max_degree,R", po::value<uint32_t>(&R)->default_value(64), "Maximum graph degree");
35-
desc.add_options()("Lbuild,L", po::value<uint32_t>(&L)->default_value(100),
36-
"Build complexity, higher value results in better graphs");
37-
desc.add_options()("search_DRAM_budget,B", po::value<float>(&B)->required(),
38-
"DRAM budget in GB for searching the index to set the "
39-
"compressed level for data while search happens");
40-
desc.add_options()("build_DRAM_budget,M", po::value<float>(&M)->required(),
41-
"DRAM budget in GB for building the index");
42-
desc.add_options()("num_threads,T", po::value<uint32_t>(&num_threads)->default_value(omp_get_num_procs()),
43-
"Number of threads used for building index (defaults to "
44-
"omp_get_num_procs())");
45-
desc.add_options()("QD", po::value<uint32_t>(&QD)->default_value(0), " Quantized Dimension for compression");
46-
desc.add_options()("codebook_prefix", po::value<std::string>(&codebook_prefix)->default_value(""),
47-
"Path prefix for pre-trained codebook");
48-
desc.add_options()("PQ_disk_bytes", po::value<uint32_t>(&disk_PQ)->default_value(0),
49-
"Number of bytes to which vectors should be compressed "
50-
"on SSD; 0 for no compression");
51-
desc.add_options()("append_reorder_data", po::bool_switch()->default_value(false),
52-
"Include full precision data in the index. Use only in "
53-
"conjuction with compressed data on SSD.");
54-
desc.add_options()("build_PQ_bytes", po::value<uint32_t>(&build_PQ)->default_value(0),
55-
"Number of PQ bytes to build the index; 0 for full "
56-
"precision build");
57-
desc.add_options()("use_opq", po::bool_switch()->default_value(false),
58-
"Use Optimized Product Quantization (OPQ).");
59-
desc.add_options()("label_file", po::value<std::string>(&label_file)->default_value(""),
60-
"Input label file in txt format for Filtered Index build ."
61-
"The file should contain comma separated filters for each node "
62-
"with each line corresponding to a graph node");
63-
desc.add_options()("universal_label", po::value<std::string>(&universal_label)->default_value(""),
64-
"Universal label, Use only in conjuction with label file for "
65-
"filtered "
66-
"index build. If a graph node has all the labels against it, we "
67-
"can "
68-
"assign a special universal filter to the point instead of comma "
69-
"separated filters for that point");
70-
desc.add_options()("FilteredLbuild", po::value<uint32_t>(&Lf)->default_value(0),
71-
"Build complexity for filtered points, higher value "
72-
"results in better graphs");
73-
desc.add_options()("filter_threshold,F", po::value<uint32_t>(&filter_threshold)->default_value(0),
74-
"Threshold to break up the existing nodes to generate new graph "
75-
"internally where each node has a maximum F labels.");
76-
desc.add_options()("label_type", po::value<std::string>(&label_type)->default_value("uint"),
77-
"Storage type of Labels <uint/ushort>, default value is uint which "
78-
"will consume memory 4 bytes per filter");
30+
31+
// Required parameters
32+
po::options_description required_configs("Required");
33+
required_configs.add_options()("data_type", po::value<std::string>(&data_type)->required(),
34+
program_options_utils::DATA_TYPE_DESCRIPTION);
35+
required_configs.add_options()("dist_fn", po::value<std::string>(&dist_fn)->required(),
36+
program_options_utils::DISTANCE_FUNCTION_DESCRIPTION);
37+
required_configs.add_options()("index_path_prefix", po::value<std::string>(&index_path_prefix)->required(),
38+
program_options_utils::INDEX_PATH_PREFIX_DESCRIPTION);
39+
required_configs.add_options()("data_path", po::value<std::string>(&data_path)->required(),
40+
program_options_utils::INPUT_DATA_PATH);
41+
required_configs.add_options()("search_DRAM_budget,B", po::value<float>(&B)->required(),
42+
"DRAM budget in GB for searching the index to set the "
43+
"compressed level for data while search happens");
44+
required_configs.add_options()("build_DRAM_budget,M", po::value<float>(&M)->required(),
45+
"DRAM budget in GB for building the index");
46+
47+
// Optional parameters
48+
po::options_description optional_configs("Optional");
49+
optional_configs.add_options()("num_threads,T",
50+
po::value<uint32_t>(&num_threads)->default_value(omp_get_num_procs()),
51+
program_options_utils::NUMBER_THREADS_DESCRIPTION);
52+
optional_configs.add_options()("max_degree,R", po::value<uint32_t>(&R)->default_value(64),
53+
program_options_utils::MAX_BUILD_DEGREE);
54+
optional_configs.add_options()("Lbuild,L", po::value<uint32_t>(&L)->default_value(100),
55+
program_options_utils::GRAPH_BUILD_COMPLEXITY);
56+
optional_configs.add_options()("QD", po::value<uint32_t>(&QD)->default_value(0),
57+
" Quantized Dimension for compression");
58+
optional_configs.add_options()("codebook_prefix", po::value<std::string>(&codebook_prefix)->default_value(""),
59+
"Path prefix for pre-trained codebook");
60+
optional_configs.add_options()("PQ_disk_bytes", po::value<uint32_t>(&disk_PQ)->default_value(0),
61+
"Number of bytes to which vectors should be compressed "
62+
"on SSD; 0 for no compression");
63+
optional_configs.add_options()("append_reorder_data", po::bool_switch()->default_value(false),
64+
"Include full precision data in the index. Use only in "
65+
"conjuction with compressed data on SSD.");
66+
optional_configs.add_options()("build_PQ_bytes", po::value<uint32_t>(&build_PQ)->default_value(0),
67+
program_options_utils::BUIlD_GRAPH_PQ_BYTES);
68+
optional_configs.add_options()("use_opq", po::bool_switch()->default_value(false),
69+
program_options_utils::USE_OPQ);
70+
optional_configs.add_options()("label_file", po::value<std::string>(&label_file)->default_value(""),
71+
program_options_utils::LABEL_FILE);
72+
optional_configs.add_options()("universal_label", po::value<std::string>(&universal_label)->default_value(""),
73+
program_options_utils::UNIVERSAL_LABEL);
74+
optional_configs.add_options()("FilteredLbuild", po::value<uint32_t>(&Lf)->default_value(0),
75+
program_options_utils::FILTERED_LBUILD);
76+
optional_configs.add_options()("filter_threshold,F", po::value<uint32_t>(&filter_threshold)->default_value(0),
77+
"Threshold to break up the existing nodes to generate new graph "
78+
"internally where each node has a maximum F labels.");
79+
optional_configs.add_options()("label_type", po::value<std::string>(&label_type)->default_value("uint"),
80+
program_options_utils::LABEL_TYPE_DESCRIPTION);
81+
82+
// Merge required and optional parameters
83+
desc.add(required_configs).add(optional_configs);
7984

8085
po::variables_map vm;
8186
po::store(po::parse_command_line(argc, argv, desc), vm);

apps/build_memory_index.cpp

Lines changed: 42 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "index.h"
99
#include "utils.h"
10+
#include "program_options_utils.hpp"
1011

1112
#ifndef _WINDOWS
1213
#include <sys/mman.h>
@@ -72,47 +73,50 @@ int main(int argc, char **argv)
7273
float alpha;
7374
bool use_pq_build, use_opq;
7475

75-
po::options_description desc{"Arguments"};
76+
po::options_description desc{
77+
program_options_utils::make_program_description("build_memory_index", "Build a memory-based DiskANN index.")};
7678
try
7779
{
7880
desc.add_options()("help,h", "Print information on arguments");
79-
desc.add_options()("data_type", po::value<std::string>(&data_type)->required(), "data type <int8/uint8/float>");
80-
desc.add_options()("dist_fn", po::value<std::string>(&dist_fn)->required(),
81-
"distance function <l2/mips/cosine>");
82-
desc.add_options()("data_path", po::value<std::string>(&data_path)->required(),
83-
"Input data file in bin format");
84-
desc.add_options()("index_path_prefix", po::value<std::string>(&index_path_prefix)->required(),
85-
"Path prefix for saving index file components");
86-
desc.add_options()("max_degree,R", po::value<uint32_t>(&R)->default_value(64), "Maximum graph degree");
87-
desc.add_options()("Lbuild,L", po::value<uint32_t>(&L)->default_value(100),
88-
"Build complexity, higher value results in better graphs");
89-
desc.add_options()("alpha", po::value<float>(&alpha)->default_value(1.2f),
90-
"alpha controls density and diameter of graph, set "
91-
"1 for sparse graph, "
92-
"1.2 or 1.4 for denser graphs with lower diameter");
93-
desc.add_options()("num_threads,T", po::value<uint32_t>(&num_threads)->default_value(omp_get_num_procs()),
94-
"Number of threads used for building index (defaults to "
95-
"omp_get_num_procs())");
96-
desc.add_options()("build_PQ_bytes", po::value<uint32_t>(&build_PQ_bytes)->default_value(0),
97-
"Number of PQ bytes to build the index; 0 for full precision "
98-
"build");
99-
desc.add_options()("use_opq", po::bool_switch()->default_value(false),
100-
"Set true for OPQ compression while using PQ "
101-
"distance comparisons for "
102-
"building the index, and false for PQ compression");
103-
desc.add_options()("label_file", po::value<std::string>(&label_file)->default_value(""),
104-
"Input label file in txt format for Filtered Index search. "
105-
"The file should contain comma separated filters for each node "
106-
"with each line corresponding to a graph node");
107-
desc.add_options()("universal_label", po::value<std::string>(&universal_label)->default_value(""),
108-
"Universal label, if using it, only in conjunction with "
109-
"labels_file");
110-
desc.add_options()("FilteredLbuild", po::value<uint32_t>(&Lf)->default_value(0),
111-
"Build complexity for filtered points, higher value "
112-
"results in better graphs");
113-
desc.add_options()("label_type", po::value<std::string>(&label_type)->default_value("uint"),
114-
"Storage type of Labels <uint/ushort>, default value is uint which "
115-
"will consume memory 4 bytes per filter");
81+
82+
// Required parameters
83+
po::options_description required_configs("Required");
84+
required_configs.add_options()("data_type", po::value<std::string>(&data_type)->required(),
85+
program_options_utils::DATA_TYPE_DESCRIPTION);
86+
required_configs.add_options()("dist_fn", po::value<std::string>(&dist_fn)->required(),
87+
program_options_utils::DISTANCE_FUNCTION_DESCRIPTION);
88+
required_configs.add_options()("index_path_prefix", po::value<std::string>(&index_path_prefix)->required(),
89+
program_options_utils::INDEX_PATH_PREFIX_DESCRIPTION);
90+
required_configs.add_options()("data_path", po::value<std::string>(&data_path)->required(),
91+
program_options_utils::INPUT_DATA_PATH);
92+
93+
// Optional parameters
94+
po::options_description optional_configs("Optional");
95+
optional_configs.add_options()("num_threads,T",
96+
po::value<uint32_t>(&num_threads)->default_value(omp_get_num_procs()),
97+
program_options_utils::NUMBER_THREADS_DESCRIPTION);
98+
optional_configs.add_options()("max_degree,R", po::value<uint32_t>(&R)->default_value(64),
99+
program_options_utils::MAX_BUILD_DEGREE);
100+
optional_configs.add_options()("Lbuild,L", po::value<uint32_t>(&L)->default_value(100),
101+
program_options_utils::GRAPH_BUILD_COMPLEXITY);
102+
optional_configs.add_options()("alpha", po::value<float>(&alpha)->default_value(1.2f),
103+
program_options_utils::GRAPH_BUILD_ALPHA);
104+
optional_configs.add_options()("build_PQ_bytes", po::value<uint32_t>(&build_PQ_bytes)->default_value(0),
105+
program_options_utils::BUIlD_GRAPH_PQ_BYTES);
106+
optional_configs.add_options()("use_opq", po::bool_switch()->default_value(false),
107+
program_options_utils::USE_OPQ);
108+
optional_configs.add_options()("label_file", po::value<std::string>(&label_file)->default_value(""),
109+
program_options_utils::LABEL_FILE);
110+
optional_configs.add_options()("universal_label", po::value<std::string>(&universal_label)->default_value(""),
111+
program_options_utils::UNIVERSAL_LABEL);
112+
113+
optional_configs.add_options()("FilteredLbuild", po::value<uint32_t>(&Lf)->default_value(0),
114+
program_options_utils::FILTERED_LBUILD);
115+
optional_configs.add_options()("label_type", po::value<std::string>(&label_type)->default_value("uint"),
116+
program_options_utils::LABEL_TYPE_DESCRIPTION);
117+
118+
// Merge required and optional parameters
119+
desc.add(required_configs).add(optional_configs);
116120

117121
po::variables_map vm;
118122
po::store(po::parse_command_line(argc, argv, desc), vm);

0 commit comments

Comments
 (0)