@@ -45,22 +45,31 @@ namespace diskann {
45
45
};
46
46
status_code _status;
47
47
size_t _active_points, _max_points, _empty_slots, _slots_released,
48
- _delete_set_size;
48
+ _delete_set_size, _num_calls_to_process_delete ;
49
49
double _time;
50
50
51
51
consolidation_report (status_code status, size_t active_points,
52
52
size_t max_points, size_t empty_slots,
53
53
size_t slots_released, size_t delete_set_size,
54
- double time_secs)
54
+ size_t num_calls_to_process_delete, double time_secs)
55
55
: _status(status), _active_points(active_points),
56
56
_max_points (max_points), _empty_slots(empty_slots),
57
57
_slots_released(slots_released), _delete_set_size(delete_set_size),
58
+ _num_calls_to_process_delete(num_calls_to_process_delete),
58
59
_time(time_secs) {
59
60
}
60
61
};
61
62
62
63
template <typename T, typename TagT = uint32_t >
63
64
class Index {
65
+ /* *************************************************************************
66
+ *
67
+ * Public functions acquire one or more of _update_lock, _consolidate_lock,
68
+ * _tag_lock, _delete_lock before calling protected functions which DO NOT
69
+ * acquire these locks. They might acquire locks on _locks[i]
70
+ *
71
+ **************************************************************************/
72
+
64
73
public:
65
74
// Constructor for Bulk operations and for creating the index object solely
66
75
// for loading a prexisting index.
@@ -125,7 +134,7 @@ namespace diskann {
125
134
// Set starting point to a random point on a sphere of certain radius
126
135
DISKANN_DLLEXPORT void set_start_point_at_random (T radius);
127
136
128
- // For Bulk Index FastL2 search, we interleave the data with graph
137
+ // For FastL2 search on a static index , we interleave the data with graph
129
138
DISKANN_DLLEXPORT void optimize_index_layout ();
130
139
131
140
// For FastL2 search on optimized layout
@@ -183,9 +192,9 @@ namespace diskann {
183
192
// memory should be allocated for vec before calling this function
184
193
DISKANN_DLLEXPORT int get_vector_by_tag (TagT &tag, T *vec);
185
194
186
- DISKANN_DLLEXPORT void print_status () const ;
195
+ DISKANN_DLLEXPORT void print_status ();
187
196
188
- DISKANN_DLLEXPORT void count_nodes_at_bfs_levels () const ;
197
+ DISKANN_DLLEXPORT void count_nodes_at_bfs_levels ();
189
198
190
199
// This variable MUST be updated if the number of entries in the metadata
191
200
// change.
@@ -203,23 +212,25 @@ namespace diskann {
203
212
Index<T, TagT> &operator =(const Index<T, TagT> &) = delete ;
204
213
205
214
// Use after _data and _nd have been populated
215
+ // Acquire exclusive _update_lock before calling
206
216
void build_with_data_populated (Parameters ¶meters,
207
217
const std::vector<TagT> &tags);
208
218
209
219
// generates 1 frozen point that will never be deleted from the graph
210
220
// This is not visible to the user
211
221
int generate_frozen_point ();
212
222
213
- // determines navigating node of the graph by calculating medoid of data
223
+ // determines navigating node of the graph by calculating medoid of datafopt
214
224
unsigned calculate_entry_point ();
215
225
216
226
std::pair<uint32_t , uint32_t > iterate_to_fixed_point (
217
227
const T *node_coords, const unsigned Lindex,
218
228
const std::vector<unsigned > &init_ids, InMemQueryScratch<T> *scratch,
219
229
bool ret_frozen = true , bool search_invocation = false );
220
230
221
- void search_for_point_and_add_links (int location, _u32 Lindex,
222
- InMemQueryScratch<T> *scratch);
231
+ void search_for_point_and_prune (int location, _u32 Lindex,
232
+ std::vector<unsigned > &pruned_list,
233
+ InMemQueryScratch<T> *scratch);
223
234
224
235
void prune_neighbors (const unsigned location, std::vector<Neighbor> &pool,
225
236
std::vector<unsigned > &pruned_list,
@@ -230,6 +241,8 @@ namespace diskann {
230
241
const float alpha, std::vector<unsigned > &pruned_list,
231
242
InMemQueryScratch<T> *scratch);
232
243
244
+ // Prunes candidates in @pool to a shorter list @result
245
+ // @pool must be sorted before calling
233
246
void occlude_list (
234
247
const unsigned location, std::vector<Neighbor> &pool, const float alpha,
235
248
const unsigned degree, const unsigned maxc,
@@ -243,30 +256,34 @@ namespace diskann {
243
256
void inter_insert (unsigned n, std::vector<unsigned > &pruned_list,
244
257
InMemQueryScratch<T> *scratch);
245
258
259
+ // Acquire exclusive _update_lock before calling
246
260
void link (Parameters ¶meters);
247
261
248
- // Acquire _tag_lock before calling
249
- int reserve_location ();
262
+ // Acquire exclusive _tag_lock and _delete_lock before calling
263
+ int reserve_location ();
264
+
265
+ // Acquire exclusive _tag_lock before calling
250
266
size_t release_location (int location);
251
- size_t release_locations (tsl::robin_set<unsigned > &locations);
267
+ size_t release_locations (const tsl::robin_set<unsigned > &locations);
252
268
253
269
// Resize the index when no slots are left for insertion.
254
- // MUST acquire _num_points_lock and _update_lock before calling.
270
+ // Acquire exclusive _update_lock and _tag_lock before calling.
255
271
void resize (size_t new_max_points);
256
272
257
- // Take an unique lock on _update_lock and _consolidate_lock
258
- // before calling these functions.
273
+ // Acquire unique lock on _update_lock, _consolidate_lock, _tag_lock
274
+ // and _delete_lock before calling these functions.
259
275
// Renumber nodes, update tag and location maps and compact the
260
276
// graph, mode = _consolidated_order in case of lazy deletion and
261
277
// _compacted_order in case of eager deletion
262
278
DISKANN_DLLEXPORT void compact_data ();
263
279
DISKANN_DLLEXPORT void compact_frozen_point ();
264
280
265
- // Remove deleted nodes from adj list of node i and absorb edges from
266
- // deleted neighbors Acquire _locks[i] prior to calling for thread-safety
281
+ // Remove deleted nodes from adjacency list of node loc
282
+ // Replace removed neighbors with second order neighbors.
283
+ // Also acquires _locks[i] for i = loc and out-neighbors of loc.
267
284
void process_delete (const tsl::robin_set<unsigned > &old_delete_set,
268
- size_t i , const unsigned & range, const unsigned & maxc,
269
- const float & alpha, InMemQueryScratch<T> *scratch);
285
+ size_t loc , const unsigned range, const unsigned maxc,
286
+ const float alpha, InMemQueryScratch<T> *scratch);
270
287
271
288
void initialize_query_scratch (uint32_t num_threads, uint32_t search_l,
272
289
uint32_t indexing_l, uint32_t r,
@@ -299,7 +316,7 @@ namespace diskann {
299
316
300
317
// Data
301
318
T *_data = nullptr ;
302
- char *_opt_graph;
319
+ char *_opt_graph = nullptr ;
303
320
304
321
// Graph related data structures
305
322
std::vector<std::vector<unsigned >> _final_graph;
@@ -335,13 +352,6 @@ namespace diskann {
335
352
// Query scratch data structures
336
353
ConcurrentQueue<InMemQueryScratch<T> *> _query_scratch;
337
354
338
- // data structures, flags and locks for dynamic indexing
339
- tsl::sparse_map<TagT, unsigned > _tag_to_location;
340
- natural_number_map<unsigned , TagT> _location_to_tag;
341
-
342
- tsl::robin_set<unsigned > _delete_set;
343
- natural_number_set<unsigned > _empty_slots;
344
-
345
355
// Flags for PQ based distance calculation
346
356
bool _pq_dist = false ;
347
357
bool _use_opq = false ;
@@ -350,23 +360,38 @@ namespace diskann {
350
360
bool _pq_generated = false ;
351
361
FixedChunkPQTable _pq_table;
352
362
353
- bool _lazy_done = false ; // true if lazy deletions have been made
363
+ //
364
+ // Data structures, locks and flags for dynamic indexing and tags
365
+ //
366
+
367
+ // lazy_delete removes entry from _location_to_tag and _tag_to_location. If
368
+ // _location_to_tag does not resolve a location, infer that it was deleted.
369
+ tsl::sparse_map<TagT, unsigned > _tag_to_location;
370
+ natural_number_map<unsigned , TagT> _location_to_tag;
371
+
372
+ // _empty_slots has unallocated slots and those freed by consolidate_delete.
373
+ // _delete_set has locations marked deleted by lazy_delete. Will not be
374
+ // immediately available for insert. consolidate_delete will release these
375
+ // slots to _empty_slots.
376
+ natural_number_set<unsigned > _empty_slots;
377
+ std::unique_ptr<tsl::robin_set<unsigned >> _delete_set;
378
+
354
379
bool _data_compacted = true ; // true if data has been compacted
355
380
bool _is_saved = false ; // Gopal. Checking if the index is already saved.
356
381
bool _conc_consolidate = false ; // use _lock while searching
357
382
358
- // Per node lock, cardinality=max_points_
359
- std::vector<non_recursive_mutex> _locks;
360
-
361
- // If acquiring multiple locks below, acquire locks in the order below
383
+ // Acquire locks in the order below when acquiring multiple locks
362
384
std::shared_timed_mutex // RW mutex between save/load (exclusive lock) and
363
385
_update_lock; // search/inserts/deletes/consolidate (shared lock)
364
- std::shared_timed_mutex
365
- _consolidate_lock; // Ensure only one consolidate is ever active
366
- std::shared_timed_mutex
367
- _tag_lock; // RW lock for _tag_to_location and _location_to_tag
368
- std::shared_timed_mutex
369
- _delete_lock; // RW Lock on _delete_set and _empty_slots
386
+ std::shared_timed_mutex // Ensure only one consolidate or compact_data is
387
+ _consolidate_lock; // ever active
388
+ std::shared_timed_mutex // RW lock for _tag_to_location,
389
+ _tag_lock; // _location_to_tag, _empty_slots, _nd, _max_points
390
+ std::shared_timed_mutex // RW Lock on _delete_set and _data_compacted
391
+ _delete_lock; // variable
392
+
393
+ // Per node lock, cardinality=_max_points
394
+ std::vector<non_recursive_mutex> _locks;
370
395
371
396
static const float INDEX_GROWTH_FACTOR;
372
397
};
0 commit comments