9
9
#include " math_utils.h"
10
10
#include " index.h"
11
11
#include " partition.h"
12
+ #include " program_options_utils.hpp"
12
13
13
14
namespace po = boost::program_options;
14
15
@@ -21,61 +22,65 @@ int main(int argc, char **argv)
21
22
bool append_reorder_data = false ;
22
23
bool use_opq = false ;
23
24
24
- po::options_description desc{" Arguments" };
25
+ po::options_description desc{
26
+ program_options_utils::make_program_description (" build_disk_index" , " Build a disk-based index." )};
25
27
try
26
28
{
27
29
desc.add_options ()(" help,h" , " Print information on arguments" );
28
- desc.add_options ()(" data_type" , po::value<std::string>(&data_type)->required (), " data type <int8/uint8/float>" );
29
- desc.add_options ()(" dist_fn" , po::value<std::string>(&dist_fn)->required (), " distance function <l2/mips>" );
30
- desc.add_options ()(" data_path" , po::value<std::string>(&data_path)->required (),
31
- " Input data file in bin format" );
32
- desc.add_options ()(" index_path_prefix" , po::value<std::string>(&index_path_prefix)->required (),
33
- " Path prefix for saving index file components" );
34
- desc.add_options ()(" max_degree,R" , po::value<uint32_t >(&R)->default_value (64 ), " Maximum graph degree" );
35
- desc.add_options ()(" Lbuild,L" , po::value<uint32_t >(&L)->default_value (100 ),
36
- " Build complexity, higher value results in better graphs" );
37
- desc.add_options ()(" search_DRAM_budget,B" , po::value<float >(&B)->required (),
38
- " DRAM budget in GB for searching the index to set the "
39
- " compressed level for data while search happens" );
40
- desc.add_options ()(" build_DRAM_budget,M" , po::value<float >(&M)->required (),
41
- " DRAM budget in GB for building the index" );
42
- desc.add_options ()(" num_threads,T" , po::value<uint32_t >(&num_threads)->default_value (omp_get_num_procs ()),
43
- " Number of threads used for building index (defaults to "
44
- " omp_get_num_procs())" );
45
- desc.add_options ()(" QD" , po::value<uint32_t >(&QD)->default_value (0 ), " Quantized Dimension for compression" );
46
- desc.add_options ()(" codebook_prefix" , po::value<std::string>(&codebook_prefix)->default_value (" " ),
47
- " Path prefix for pre-trained codebook" );
48
- desc.add_options ()(" PQ_disk_bytes" , po::value<uint32_t >(&disk_PQ)->default_value (0 ),
49
- " Number of bytes to which vectors should be compressed "
50
- " on SSD; 0 for no compression" );
51
- desc.add_options ()(" append_reorder_data" , po::bool_switch ()->default_value (false ),
52
- " Include full precision data in the index. Use only in "
53
- " conjuction with compressed data on SSD." );
54
- desc.add_options ()(" build_PQ_bytes" , po::value<uint32_t >(&build_PQ)->default_value (0 ),
55
- " Number of PQ bytes to build the index; 0 for full "
56
- " precision build" );
57
- desc.add_options ()(" use_opq" , po::bool_switch ()->default_value (false ),
58
- " Use Optimized Product Quantization (OPQ)." );
59
- desc.add_options ()(" label_file" , po::value<std::string>(&label_file)->default_value (" " ),
60
- " Input label file in txt format for Filtered Index build ."
61
- " The file should contain comma separated filters for each node "
62
- " with each line corresponding to a graph node" );
63
- desc.add_options ()(" universal_label" , po::value<std::string>(&universal_label)->default_value (" " ),
64
- " Universal label, Use only in conjuction with label file for "
65
- " filtered "
66
- " index build. If a graph node has all the labels against it, we "
67
- " can "
68
- " assign a special universal filter to the point instead of comma "
69
- " separated filters for that point" );
70
- desc.add_options ()(" FilteredLbuild" , po::value<uint32_t >(&Lf)->default_value (0 ),
71
- " Build complexity for filtered points, higher value "
72
- " results in better graphs" );
73
- desc.add_options ()(" filter_threshold,F" , po::value<uint32_t >(&filter_threshold)->default_value (0 ),
74
- " Threshold to break up the existing nodes to generate new graph "
75
- " internally where each node has a maximum F labels." );
76
- desc.add_options ()(" label_type" , po::value<std::string>(&label_type)->default_value (" uint" ),
77
- " Storage type of Labels <uint/ushort>, default value is uint which "
78
- " will consume memory 4 bytes per filter" );
30
+
31
+ // Required parameters
32
+ po::options_description required_configs (" Required" );
33
+ required_configs.add_options ()(" data_type" , po::value<std::string>(&data_type)->required (),
34
+ program_options_utils::DATA_TYPE_DESCRIPTION);
35
+ required_configs.add_options ()(" dist_fn" , po::value<std::string>(&dist_fn)->required (),
36
+ program_options_utils::DISTANCE_FUNCTION_DESCRIPTION);
37
+ required_configs.add_options ()(" index_path_prefix" , po::value<std::string>(&index_path_prefix)->required (),
38
+ program_options_utils::INDEX_PATH_PREFIX_DESCRIPTION);
39
+ required_configs.add_options ()(" data_path" , po::value<std::string>(&data_path)->required (),
40
+ program_options_utils::INPUT_DATA_PATH);
41
+ required_configs.add_options ()(" search_DRAM_budget,B" , po::value<float >(&B)->required (),
42
+ " DRAM budget in GB for searching the index to set the "
43
+ " compressed level for data while search happens" );
44
+ required_configs.add_options ()(" build_DRAM_budget,M" , po::value<float >(&M)->required (),
45
+ " DRAM budget in GB for building the index" );
46
+
47
+ // Optional parameters
48
+ po::options_description optional_configs (" Optional" );
49
+ optional_configs.add_options ()(" num_threads,T" ,
50
+ po::value<uint32_t >(&num_threads)->default_value (omp_get_num_procs ()),
51
+ program_options_utils::NUMBER_THREADS_DESCRIPTION);
52
+ optional_configs.add_options ()(" max_degree,R" , po::value<uint32_t >(&R)->default_value (64 ),
53
+ program_options_utils::MAX_BUILD_DEGREE);
54
+ optional_configs.add_options ()(" Lbuild,L" , po::value<uint32_t >(&L)->default_value (100 ),
55
+ program_options_utils::GRAPH_BUILD_COMPLEXITY);
56
+ optional_configs.add_options ()(" QD" , po::value<uint32_t >(&QD)->default_value (0 ),
57
+ " Quantized Dimension for compression" );
58
+ optional_configs.add_options ()(" codebook_prefix" , po::value<std::string>(&codebook_prefix)->default_value (" " ),
59
+ " Path prefix for pre-trained codebook" );
60
+ optional_configs.add_options ()(" PQ_disk_bytes" , po::value<uint32_t >(&disk_PQ)->default_value (0 ),
61
+ " Number of bytes to which vectors should be compressed "
62
+ " on SSD; 0 for no compression" );
63
+ optional_configs.add_options ()(" append_reorder_data" , po::bool_switch ()->default_value (false ),
64
+ " Include full precision data in the index. Use only in "
65
+ " conjuction with compressed data on SSD." );
66
+ optional_configs.add_options ()(" build_PQ_bytes" , po::value<uint32_t >(&build_PQ)->default_value (0 ),
67
+ program_options_utils::BUIlD_GRAPH_PQ_BYTES);
68
+ optional_configs.add_options ()(" use_opq" , po::bool_switch ()->default_value (false ),
69
+ program_options_utils::USE_OPQ);
70
+ optional_configs.add_options ()(" label_file" , po::value<std::string>(&label_file)->default_value (" " ),
71
+ program_options_utils::LABEL_FILE);
72
+ optional_configs.add_options ()(" universal_label" , po::value<std::string>(&universal_label)->default_value (" " ),
73
+ program_options_utils::UNIVERSAL_LABEL);
74
+ optional_configs.add_options ()(" FilteredLbuild" , po::value<uint32_t >(&Lf)->default_value (0 ),
75
+ program_options_utils::FILTERED_LBUILD);
76
+ optional_configs.add_options ()(" filter_threshold,F" , po::value<uint32_t >(&filter_threshold)->default_value (0 ),
77
+ " Threshold to break up the existing nodes to generate new graph "
78
+ " internally where each node has a maximum F labels." );
79
+ optional_configs.add_options ()(" label_type" , po::value<std::string>(&label_type)->default_value (" uint" ),
80
+ program_options_utils::LABEL_TYPE_DESCRIPTION);
81
+
82
+ // Merge required and optional parameters
83
+ desc.add (required_configs).add (optional_configs);
79
84
80
85
po::variables_map vm;
81
86
po::store (po::parse_command_line (argc, argv, desc), vm);
0 commit comments