forked from facebookresearch/faiss
-
Notifications
You must be signed in to change notification settings - Fork 0
/
IndexIVF.h
222 lines (148 loc) · 6.37 KB
/
IndexIVF.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
/**
* Copyright (c) 2015-present, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the CC-by-NC license found in the
* LICENSE file in the root directory of this source tree.
*/
// Copyright 2004-present Facebook. All Rights Reserved.
// -*- c++ -*-
#ifndef FAISS_INDEX_IVF_H
#define FAISS_INDEX_IVF_H
#include <vector>
#include "Index.h"
#include "Clustering.h"
#include "Heap.h"
namespace faiss {
/** Index based on a inverted file (IVF)
*
* In the inverted file, the quantizer (an Index instance) provides a
* quantization index for each vector to be added. The quantization
* index maps to a list (aka inverted list or posting list), where the
* id of the vector is then stored.
*
* At search time, the vector to be searched is also quantized, and
* only the list corresponding to the quantization index is
* searched. This speeds up the search by making it
* non-exhaustive. This can be relaxed using multi-probe search: a few
* (nprobe) quantization indices are selected and several inverted
* lists are visited.
*
* Sub-classes implement a post-filtering of the index that refines
* the distance estimation from the query to databse vectors.
*/
struct IndexIVF: Index {
size_t nlist; ///< number of possible key values
size_t nprobe; ///< number of probes at query time
Index * quantizer; ///< quantizer that maps vectors to inverted lists
bool quantizer_trains_alone; ///< just pass over the trainset to quantizer
bool own_fields; ///< whether object owns the quantizer
ClusteringParameters cp; ///< to override default clustering params
std::vector < std::vector<long> > ids; ///< Inverted lists for indexes
/// map for direct access to the elements. Enables reconstruct().
bool maintain_direct_map;
std::vector <long> direct_map;
/** The Inverted file takes a quantizer (an Index) on input,
* which implements the function mapping a vector to a list
* identifier. The pointer is borrowed: the quantizer should not
* be deleted while the IndexIVF is in use.
*/
IndexIVF (Index * quantizer, size_t d, size_t nlist,
MetricType metric = METRIC_INNER_PRODUCT);
virtual void reset () override;
/// Trains the quantizer and calls train_residual to train sub-quantizers
virtual void train (idx_t n, const float *x) override;
/// Quantizes x and calls add_with_key
virtual void add (idx_t n, const float *x) override;
/// Sub-classes that encode the residuals can train their encoders here
/// does nothing by default
virtual void train_residual (idx_t n, const float *x);
/** moves the entries from another dataset to self. On output,
* other is empty. add_id is added to all moved ids (for
* sequential ids, this would be this->ntotal */
virtual void merge_from (IndexIVF &other, idx_t add_id);
/** implemented by sub-classes */
virtual void merge_from_residuals (IndexIVF &other) = 0;
virtual ~IndexIVF();
/// intialize a direct map
void make_direct_map ();
/// 1= perfectly balanced, >1: imbalanced
double imbalance_factor () const;
/// display some stats about the inverted lists
void print_stats () const;
IndexIVF ();
};
struct IndexIVFFlatStats {
size_t nq; // nb of queries run
size_t nlist; // nb of inverted lists scanned
size_t ndis; // nb of distancs computed
size_t npartial; // nb of bound computations (IndexIVFFlatIPBounds)
IndexIVFFlatStats () {reset (); }
void reset ();
};
// global var that collects them all
extern IndexIVFFlatStats indexIVFFlat_stats;
/** Inverted file with stored vectors. Here the inverted file
* pre-selects the vectors to be searched, but they are not otherwise
* encoded.
*/
struct IndexIVFFlat: IndexIVF {
/** Inverted list of original vectors. Each list is a nl * d
* matrix, where nl is the nb of vectors stored in the list. */
std::vector < std::vector<float> > vecs;
IndexIVFFlat (
Index * quantizer, size_t d, size_t nlist_,
MetricType = METRIC_INNER_PRODUCT);
/** Return the typeName of the index (which includes main parameters */
virtual void set_typename () override;
/// same as add_with_ids, with precomputed coarse quantizer
virtual void add_core (idx_t n, const float * x, const long *xids,
const long *precomputed_idx);
/// implemented for all IndexIVF* classes
virtual void add_with_ids (idx_t n, const float * x, const long *xids)
override;
virtual void search (
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const override;
virtual void range_search (
idx_t n, const float *x, float radius,
RangeSearchResult *result) const override;
/** copy a subset of the entries index to the other index
*
* if subset_type == 0: copies ids in [a1, a2)
* if subset_type == 1: copies ids if id % a1 == a2
*/
void copy_subset_to (IndexIVFFlat & other, int subset_type,
long a1, long a2) const;
virtual void reset() override;
virtual long remove_ids (const IDSelector & sel) override;
/// Implementation of the search for the inner product metric
void search_knn_inner_product (
size_t nx, const float * x,
const long * keys,
float_minheap_array_t * res) const;
/// Implementation of the search for the L2 metric
void search_knn_L2sqr (
size_t nx, const float * x,
const long * keys,
float_maxheap_array_t * res) const;
virtual void reconstruct (idx_t key, float * recons) const override;
virtual void merge_from_residuals (IndexIVF &other) override;
IndexIVFFlat () {}
};
struct IndexIVFFlatIPBounds: IndexIVFFlat {
/// nb of dimensions of pre-filter
size_t fsize;
/// norm of remainder (dimensions fsize:d)
std::vector<std::vector<float> > part_norms;
IndexIVFFlatIPBounds (
Index * quantizer, size_t d, size_t nlist,
size_t fsize);
virtual void add_core (idx_t n, const float * x, const long *xids,
const long *precomputed_idx) override;
virtual void search (
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels) const override;
};
} // namespace faiss
#endif