|
| 1 | +use crate::{Encodable, HashFunctions, index_for_seed, indices}; |
| 2 | + |
| 3 | +/// Represents a coded symbol in the invertible bloom filter table. |
| 4 | +/// In some of the literature this is referred to as a "cell" or "bucket". |
| 5 | +/// It includes a checksum to verify whether the instance represents a pure value. |
| 6 | +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] |
| 7 | +pub struct CodedSymbol<T: Encodable> { |
| 8 | + /// Values aggregated by XOR operation. |
| 9 | + pub value: T, |
| 10 | + /// We repurpose the two least significant bits of the checksum: |
| 11 | + /// - The least significant bit is a one bit counter which is incremented for each entity. |
| 12 | + /// This bit must be set when there is a single entity represented by this hash. |
| 13 | + /// - The second least significant bit indicates whether the entity is a deletion or insertion. |
| 14 | + pub checksum: u64, |
| 15 | +} |
| 16 | + |
| 17 | +impl<T: Encodable> Default for CodedSymbol<T> { |
| 18 | + fn default() -> Self { |
| 19 | + CodedSymbol { |
| 20 | + value: T::zero(), |
| 21 | + checksum: 0, |
| 22 | + } |
| 23 | + } |
| 24 | +} |
| 25 | + |
| 26 | +impl<T: Encodable> From<(T, u64)> for CodedSymbol<T> { |
| 27 | + fn from(tuple: (T, u64)) -> Self { |
| 28 | + Self { |
| 29 | + value: tuple.0, |
| 30 | + checksum: tuple.1, |
| 31 | + } |
| 32 | + } |
| 33 | +} |
| 34 | + |
| 35 | +impl<T: Encodable> CodedSymbol<T> { |
| 36 | + /// Creates a new coded symbol with the given hash and deletion flag. |
| 37 | + pub(crate) fn new<S: HashFunctions<T>>(state: &S, hash: T, deletion: bool) -> Self { |
| 38 | + let mut checksum = state.check_sum(&hash); |
| 39 | + checksum |= 1; // Add a single bit counter |
| 40 | + if deletion { |
| 41 | + checksum = checksum.wrapping_neg(); |
| 42 | + } |
| 43 | + CodedSymbol { |
| 44 | + value: hash, |
| 45 | + checksum, |
| 46 | + } |
| 47 | + } |
| 48 | + |
| 49 | + /// Merges another coded symbol into this one. |
| 50 | + pub(crate) fn add(&mut self, other: &CodedSymbol<T>, negate: bool) { |
| 51 | + self.value.xor(other.value); |
| 52 | + if negate { |
| 53 | + self.checksum = self.checksum.wrapping_sub(other.checksum); |
| 54 | + } else { |
| 55 | + self.checksum = self.checksum.wrapping_add(other.checksum); |
| 56 | + } |
| 57 | + } |
| 58 | + |
| 59 | + /// Checks whether this coded symbol is pure, i.e., whether it represents a single entity |
| 60 | + /// A pure coded symbol must satisfy the following conditions: |
| 61 | + /// - The 1-bit counter must be 1 or -1 (which are both represented by the bit being set) |
| 62 | + /// - The checksum must match the checksum of the value. |
| 63 | + /// - The indices of the value must match the index of this coded symbol. |
| 64 | + pub(crate) fn is_pure<S: HashFunctions<T>>( |
| 65 | + &self, |
| 66 | + state: &S, |
| 67 | + i: usize, |
| 68 | + len: usize, |
| 69 | + ) -> (bool, usize) { |
| 70 | + if self.checksum & 1 == 0 { |
| 71 | + return (false, 0); |
| 72 | + } |
| 73 | + let multiplicity = indices_contains(state, &self.value, len, i); |
| 74 | + if multiplicity != 1 { |
| 75 | + return (false, 0); |
| 76 | + } |
| 77 | + let checksum = state.check_sum(&self.value) | 1; |
| 78 | + if checksum == self.checksum || checksum.wrapping_neg() == self.checksum { |
| 79 | + (true, 0) |
| 80 | + } else { |
| 81 | + let required_bits = self |
| 82 | + .checksum |
| 83 | + .wrapping_sub(checksum) |
| 84 | + .leading_zeros() |
| 85 | + .max(self.checksum.wrapping_add(checksum).leading_zeros()) |
| 86 | + as usize; |
| 87 | + (false, required_bits) |
| 88 | + } |
| 89 | + } |
| 90 | + |
| 91 | + /// Checks whether this coded symbol is zero, i.e., whether it represents no entity. |
| 92 | + pub(crate) fn is_zero(&self) -> bool { |
| 93 | + self.checksum == 0 && self.value == T::zero() |
| 94 | + } |
| 95 | + |
| 96 | + /// Checks whether this coded symbol represents a deletion. |
| 97 | + pub(crate) fn is_deletion<S: HashFunctions<T>>(&self, state: &S) -> bool { |
| 98 | + let checksum = state.check_sum(&self.value) | 1; |
| 99 | + checksum != self.checksum |
| 100 | + } |
| 101 | +} |
| 102 | + |
| 103 | +/// This function checks efficiently whether the given index is contained in the indices. |
| 104 | +/// |
| 105 | +/// Note: we have constructed the indices such that we can determine from the last 5 bits |
| 106 | +/// which hash function would map to this index. Therefore, we only need to check against |
| 107 | +/// a single hash function and not all 5! |
| 108 | +/// The only exception is for very small indices (0..32) or if the index is a multiple of 32. |
| 109 | +/// |
| 110 | +/// The function returns the multiplicity, i.e. how many indices hit this particular index. |
| 111 | +/// Thereby, it takes into account whether the value is stored negated or not. |
| 112 | +fn indices_contains<T: std::hash::Hash>( |
| 113 | + state: &impl HashFunctions<T>, |
| 114 | + value: &T, |
| 115 | + stream_len: usize, |
| 116 | + i: usize, |
| 117 | +) -> i32 { |
| 118 | + if stream_len > 32 && i % 32 != 0 { |
| 119 | + let seed = i % 4; |
| 120 | + let j = index_for_seed(state, value, stream_len, seed as u32); |
| 121 | + if i == j { 1 } else { 0 } |
| 122 | + } else { |
| 123 | + indices(state, value, stream_len) |
| 124 | + .map(|j| if j == i { 1 } else { 0 }) |
| 125 | + .sum() |
| 126 | + } |
| 127 | +} |
0 commit comments