Skip to content

Commit 27cde02

Browse files
authored
Port over fixes from vortex repo (#28)
1 parent 41578b9 commit 27cde02

File tree

3 files changed

+86
-39
lines changed

3 files changed

+86
-39
lines changed

Cargo.lock

Lines changed: 30 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ edition = "2021"
88

99
[dependencies]
1010
fastlanes = "0.1"
11+
itertools = "0.14.0"
1112
num-traits = "0.2.19"
1213
serde = { version = "1", optional = true, features = ["derive"] }
1314

src/alp/mod.rs

Lines changed: 55 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1+
use itertools::Itertools;
2+
use num_traits::{CheckedSub, Float, PrimInt, ToPrimitive};
13
use std::fmt::{Display, Formatter};
24
use std::mem::size_of;
35

4-
use num_traits::{CheckedSub, Float, PrimInt, ToPrimitive};
5-
66
const SAMPLE_SIZE: usize = 32;
77

88
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
@@ -46,18 +46,16 @@ pub fn encode_single<F: ALPFloat>(value: F, exponents: Exponents) -> Result<F::A
4646

4747
/// Decodes an integer value to its matching floating point representation given the same exponents.
4848
pub fn decode_single<F: ALPFloat>(encoded: F::ALPInt, exponents: Exponents) -> F {
49-
F::from_int(encoded) * F::F10[exponents.f as usize] * F::IF10[exponents.e as usize]
49+
F::decode_single(encoded, exponents)
5050
}
5151

5252
/// Encodes a single value, it might not round-trip back it its original value
5353
/// # Safety
5454
///
5555
/// The returned value may not decode back to the original value.
5656
#[inline(always)]
57-
pub unsafe fn encode_single_unchecked<F: ALPFloat>(value: F, exponents: Exponents) -> F::ALPInt {
58-
(value * F::F10[exponents.e as usize] * F::IF10[exponents.f as usize])
59-
.fast_round()
60-
.as_int()
57+
pub fn encode_single_unchecked<F: ALPFloat>(value: F, exponents: Exponents) -> F::ALPInt {
58+
F::encode_single_unchecked(value, exponents)
6159
}
6260

6361
pub trait ALPFloat: private::Sealed + Float + Display + 'static {
@@ -81,16 +79,18 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
8179
/// Convert from the integer type back to the float type using `as`.
8280
fn from_int(n: Self::ALPInt) -> Self;
8381

82+
fn is_eq(self, other: Self) -> bool;
83+
8484
fn find_best_exponents(values: &[Self]) -> Exponents {
8585
let mut best_exp = Exponents { e: 0, f: 0 };
8686
let mut best_nbytes: usize = usize::MAX;
8787

88-
let sample: Option<Vec<Self>> = (values.len() > SAMPLE_SIZE).then(|| {
88+
let sample = (values.len() > SAMPLE_SIZE).then(|| {
8989
values
9090
.iter()
9191
.step_by(values.len() / SAMPLE_SIZE)
9292
.cloned()
93-
.collect()
93+
.collect_vec()
9494
});
9595

9696
for e in (0..Self::MAX_EXPONENT).rev() {
@@ -115,12 +115,10 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
115115

116116
#[inline]
117117
fn estimate_encoded_size(encoded: &[Self::ALPInt], patches: &[Self]) -> usize {
118-
let minmax = encoded.iter().fold(None, |minmax, next| {
119-
let (min, max) = minmax.unwrap_or((next, next));
120-
121-
Some((min.min(next), max.max(next)))
122-
});
123-
let bits_per_encoded = minmax
118+
let bits_per_encoded = encoded
119+
.iter()
120+
.minmax()
121+
.into_option()
124122
// estimating bits per encoded value assuming frame-of-reference + bitpacking-without-patches
125123
.and_then(|(min, max)| max.checked_sub(min))
126124
.and_then(|range_size: <Self as ALPFloat>::ALPInt| range_size.to_u64())
@@ -168,11 +166,23 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
168166
(exp, encoded_output, patch_indices, patch_values)
169167
}
170168

169+
fn encode_above(value: Self, exponents: Exponents) -> Self::ALPInt {
170+
(value * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
171+
.ceil()
172+
.as_int()
173+
}
174+
175+
fn encode_below(value: Self, exponents: Exponents) -> Self::ALPInt {
176+
(value * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
177+
.floor()
178+
.as_int()
179+
}
180+
171181
#[inline]
172182
fn encode_single(value: Self, exponents: Exponents) -> Result<Self::ALPInt, Self> {
173-
let encoded = unsafe { Self::encode_single_unchecked(value, exponents) };
183+
let encoded = Self::encode_single_unchecked(value, exponents);
174184
let decoded = Self::decode_single(encoded, exponents);
175-
if decoded == value {
185+
if decoded.is_eq(value) {
176186
return Ok(encoded);
177187
}
178188
Err(value)
@@ -183,11 +193,9 @@ pub trait ALPFloat: private::Sealed + Float + Display + 'static {
183193
Self::from_int(encoded) * Self::F10[exponents.f as usize] * Self::IF10[exponents.e as usize]
184194
}
185195

186-
/// # Safety
187-
///
188-
/// The returned value may not decode back to the original value.
196+
/// Encodes a single value, it might not round-trip back it its original value
189197
#[inline(always)]
190-
unsafe fn encode_single_unchecked(value: Self, exponents: Exponents) -> Self::ALPInt {
198+
fn encode_single_unchecked(value: Self, exponents: Exponents) -> Self::ALPInt {
191199
(value * Self::F10[exponents.e as usize] * Self::IF10[exponents.f as usize])
192200
.fast_round()
193201
.as_int()
@@ -209,10 +217,10 @@ fn encode_chunk_unchecked<T: ALPFloat>(
209217

210218
// encode the chunk, counting the number of patches
211219
let mut chunk_patch_count = 0;
212-
encoded_output.extend(chunk.iter().map(|v| {
213-
let encoded = unsafe { T::encode_single_unchecked(*v, exp) };
220+
encoded_output.extend(chunk.iter().map(|&v| {
221+
let encoded = encode_single_unchecked(v, exp);
214222
let decoded = T::decode_single(encoded, exp);
215-
let neq = (decoded != *v) as usize;
223+
let neq = !decoded.is_eq(v) as usize;
216224
chunk_patch_count += neq;
217225
encoded
218226
}));
@@ -234,7 +242,7 @@ fn encode_chunk_unchecked<T: ALPFloat>(
234242
// write() is only safe to call more than once because the values are primitive (i.e., Drop is a no-op)
235243
patch_indices_mut[chunk_patch_index].write(i as u64);
236244
patch_values_mut[chunk_patch_index].write(chunk[i - num_prev_encoded]);
237-
chunk_patch_index += (decoded != chunk[i - num_prev_encoded]) as usize;
245+
chunk_patch_index += !decoded.is_eq(chunk[i - num_prev_encoded]) as usize;
238246
}
239247
assert_eq!(chunk_patch_index, chunk_patch_count);
240248
unsafe {
@@ -309,6 +317,10 @@ impl ALPFloat for f32 {
309317
fn from_int(n: Self::ALPInt) -> Self {
310318
n as _
311319
}
320+
321+
fn is_eq(self, other: Self) -> bool {
322+
self.to_bits() == other.to_bits()
323+
}
312324
}
313325

314326
impl ALPFloat for f64 {
@@ -380,4 +392,22 @@ impl ALPFloat for f64 {
380392
fn from_int(n: Self::ALPInt) -> Self {
381393
n as _
382394
}
395+
396+
fn is_eq(self, other: Self) -> bool {
397+
self.to_bits() == other.to_bits()
398+
}
399+
}
400+
401+
#[cfg(test)]
402+
mod tests {
403+
use super::*;
404+
405+
#[test]
406+
fn non_finite_numbers() {
407+
let original = vec![0.0f32, -0.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY];
408+
let (_, encoded, patch_idx, _) = encode(&original, None);
409+
410+
assert_eq!(patch_idx, vec![1, 2, 3, 4]);
411+
assert_eq!(encoded, vec![0, 0, 0, 0, 0]);
412+
}
383413
}

0 commit comments

Comments
 (0)