Skip to content

Commit ea13ebc

Browse files
committed
sort each posting-list by the distance to centroid
Signed-off-by: Keming <[email protected]>
1 parent bb440e3 commit ea13ebc

File tree

2 files changed

+11
-4
lines changed

2 files changed

+11
-4
lines changed

src/rabitq.rs

+9-2
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,8 @@ impl RaBitQ {
176176
if i % 5000 == 0 {
177177
debug!("\t> preprocessing {}...", i);
178178
}
179-
let min_label = kmeans_nearest_cluster(&centroids.as_ref(), &xp);
180-
labels[min_label].push(i as u32);
179+
let (min_label, min_dist) = kmeans_nearest_cluster(&centroids.as_ref(), &xp);
180+
labels[min_label].push((i as u32, min_dist));
181181
let x_c_quantized = xp - centroids.col(min_label);
182182
x_c_distance[i] = x_c_quantized.norm_l2();
183183
factors[i].center_distance_square = x_c_distance[i].powi(2);
@@ -205,6 +205,13 @@ impl RaBitQ {
205205
}
206206

207207
// sort by labels
208+
let labels = labels
209+
.into_iter()
210+
.map(|mut v| {
211+
v.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
212+
v.into_iter().map(|(i, _)| i).collect::<Vec<_>>()
213+
})
214+
.collect::<Vec<_>>();
208215
debug!("sort by labels...");
209216
let mut offsets = vec![0; k + 1];
210217
for i in 0..k {

src/utils.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ pub fn project(vec: &ColRef<f32>, orthogonal: &MatRef<f32>) -> Col<f32> {
248248
}
249249

250250
/// Find the nearest cluster for the given vector.
251-
pub fn kmeans_nearest_cluster(centroids: &MatRef<f32>, vec: &ColRef<f32>) -> usize {
251+
pub fn kmeans_nearest_cluster(centroids: &MatRef<f32>, vec: &ColRef<f32>) -> (usize, f32) {
252252
let mut min_dist = f32::MAX;
253253
let mut min_label = 0;
254254
for (j, centroid) in centroids.col_iter().enumerate() {
@@ -258,7 +258,7 @@ pub fn kmeans_nearest_cluster(centroids: &MatRef<f32>, vec: &ColRef<f32>) -> usi
258258
min_label = j;
259259
}
260260
}
261-
min_label
261+
(min_label, min_dist)
262262
}
263263

264264
/// Read the fvces/ivces file.

0 commit comments

Comments
 (0)