Skip to content

Commit b6c25a9

Browse files
authored
Memory management and access features for primitive arrays and bit arrays
* Add BitsIter and Bitmap::make_iter * Add math_op_with_data_type * Add Clone implementations for BooleanArray and PrimitiveArray * Add functionality to BooleanBufferBuilder and BufferBuilder * Add into_parts-type functions for consuming PrimitiveArray and BooleanArray
1 parent ea7d119 commit b6c25a9

File tree

7 files changed

+122
-2
lines changed

7 files changed

+122
-2
lines changed

arrow/src/array/array_boolean.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ use std::{any::Any, fmt};
2222

2323
use super::*;
2424
use super::{array::print_long_array, raw_pointer::RawPtrBox};
25+
use crate::bitmap::Bitmap;
2526
use crate::buffer::{Buffer, MutableBuffer};
2627
use crate::util::bit_util;
2728

@@ -54,6 +55,12 @@ pub struct BooleanArray {
5455
raw_values: RawPtrBox<u8>,
5556
}
5657

58+
impl Clone for BooleanArray {
59+
fn clone(&self) -> Self {
60+
Self::from(self.data.clone())
61+
}
62+
}
63+
5764
impl fmt::Debug for BooleanArray {
5865
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
5966
write!(f, "BooleanArray\n[\n")?;
@@ -103,6 +110,12 @@ impl BooleanArray {
103110
debug_assert!(i < self.len());
104111
unsafe { self.value_unchecked(i) }
105112
}
113+
114+
/// Returns (_, _, offset, length)
115+
pub fn into_parts(self) -> (Buffer, Option<Bitmap>, usize, usize) {
116+
let data = self.data;
117+
data.into_1_dimensional_parts()
118+
}
106119
}
107120

108121
impl Array for BooleanArray {

arrow/src/array/array_primitive.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ pub struct PrimitiveArray<T: ArrowPrimitiveType> {
5757
raw_values: RawPtrBox<T::Native>,
5858
}
5959

60+
impl<T: ArrowPrimitiveType> Clone for PrimitiveArray<T> {
61+
fn clone(&self) -> Self {
62+
Self::from(self.data.clone())
63+
}
64+
}
65+
6066
impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
6167
/// Returns the length of this array.
6268
#[inline]
@@ -140,6 +146,10 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
140146
);
141147
PrimitiveArray::from(data)
142148
}
149+
150+
pub fn into_data(self) -> ArrayData {
151+
self.data
152+
}
143153
}
144154

145155
impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {

arrow/src/array/builder.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
//! as an internal buffer in an [`ArrayData`](crate::array::ArrayData)
2121
//! object.
2222
23+
// use core::slice::SlicePattern;
2324
use std::any::Any;
2425
use std::collections::HashMap;
2526
use std::fmt;
@@ -86,6 +87,12 @@ pub struct BufferBuilder<T: ArrowNativeType> {
8687
_marker: PhantomData<T>,
8788
}
8889

90+
impl<T: ArrowNativeType> Default for BufferBuilder<T> {
91+
fn default() -> Self {
92+
Self::new(0)
93+
}
94+
}
95+
8996
impl<T: ArrowNativeType> BufferBuilder<T> {
9097
/// Creates a new builder with initial capacity for _at least_ `capacity`
9198
/// elements of type `T`.
@@ -134,6 +141,16 @@ impl<T: ArrowNativeType> BufferBuilder<T> {
134141
self.len
135142
}
136143

144+
#[allow(missing_docs)]
145+
pub fn typed_data_mut(&mut self) -> &mut [T] {
146+
// TODO: Make faster.
147+
unsafe {
148+
let (_prefix, offsets, _suffix) =
149+
self.buffer.as_slice_mut().align_to_mut::<T>();
150+
offsets
151+
}
152+
}
153+
137154
/// Returns whether the internal buffer is empty.
138155
///
139156
/// # Example:
@@ -297,11 +314,23 @@ impl BooleanBufferBuilder {
297314
Self { buffer, len: 0 }
298315
}
299316

317+
#[inline]
318+
pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> BooleanBufferBuilder {
319+
assert_eq!(len.div_ceil(8), buffer.len());
320+
Self { buffer, len }
321+
}
322+
300323
#[inline]
301324
pub fn len(&self) -> usize {
302325
self.len
303326
}
304327

328+
#[inline]
329+
pub fn get_bit(&self, index: usize) -> bool {
330+
bit_util::get_bit(self.buffer.as_ref(), index)
331+
}
332+
333+
// TODO: Probably, make set_bit be branchless
305334
#[inline]
306335
pub fn set_bit(&mut self, index: usize, v: bool) {
307336
if v {
@@ -382,6 +411,12 @@ impl BooleanBufferBuilder {
382411
self.len = 0;
383412
buf.into()
384413
}
414+
415+
#[inline]
416+
/// Builds the [Buffer] without resetting the builder.
417+
pub fn finish_cloned(&self) -> Buffer {
418+
Buffer::from_slice_ref(&self.buffer.as_slice())
419+
}
385420
}
386421

387422
impl From<BooleanBufferBuilder> for Buffer {

arrow/src/array/data.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,15 @@ impl ArrayData {
478478

479479
Self::new(data_type.clone(), 0, Some(0), None, 0, buffers, child_data)
480480
}
481+
482+
pub fn into_1_dimensional_parts(self) -> (Buffer, Option<Bitmap>, usize, usize) {
483+
let offset: usize = self.offset;
484+
let length: usize = self.len;
485+
let buffers: Vec<Buffer> = self.buffers;
486+
let bitmap: Option<Bitmap> = self.null_bitmap;
487+
let buffer0: Buffer = buffers.into_iter().next().unwrap();
488+
(buffer0, bitmap, offset, length)
489+
}
481490
}
482491

483492
impl PartialEq for ArrayData {

arrow/src/bitmap.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ impl Bitmap {
7474
pub fn get_array_memory_size(&self) -> usize {
7575
self.bits.capacity() + mem::size_of_val(self)
7676
}
77+
78+
pub fn make_iter<'a>(&'a self, offset: usize, len: usize) -> bit_util::BitsIter<'a> {
79+
bit_util::BitsIter::new(self.bits.as_slice(), offset, len)
80+
}
7781
}
7882

7983
impl<'a, 'b> BitAnd<&'b Bitmap> for &'a Bitmap {

arrow/src/compute/kernels/arithmetic.rs

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ use crate::buffer::MutableBuffer;
3232
#[cfg(not(feature = "simd"))]
3333
use crate::compute::kernels::arity::unary;
3434
use crate::compute::util::combine_option_bitmap;
35-
use crate::datatypes;
3635
use crate::datatypes::ArrowNumericType;
36+
use crate::datatypes::{self, ArrowPrimitiveType, DataType};
3737
use crate::error::{ArrowError, Result};
3838
use crate::{array::*, util::bit_util};
3939
use num::traits::Pow;
@@ -155,6 +155,21 @@ pub fn math_op<T, F>(
155155
where
156156
T: ArrowNumericType,
157157
F: Fn(T::Native, T::Native) -> T::Native,
158+
{
159+
math_op_with_data_type(T::DATA_TYPE, left, right, op)
160+
}
161+
162+
/// Like `math_op` but builds a PrimitiveArray with the supplied data type.
163+
pub fn math_op_with_data_type<T, U, F>(
164+
data_type: DataType,
165+
left: &PrimitiveArray<T>,
166+
right: &PrimitiveArray<U>,
167+
op: F,
168+
) -> Result<PrimitiveArray<T>>
169+
where
170+
T: ArrowPrimitiveType,
171+
U: ArrowPrimitiveType,
172+
F: Fn(T::Native, U::Native) -> T::Native,
158173
{
159174
if left.len() != right.len() {
160175
return Err(ArrowError::ComputeError(
@@ -178,7 +193,7 @@ where
178193
let buffer = unsafe { Buffer::from_trusted_len_iter(values) };
179194

180195
let data = ArrayData::new(
181-
T::DATA_TYPE,
196+
data_type,
182197
left.len(),
183198
None,
184199
null_bit_buffer,

arrow/src/util/bit_util.rs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,40 @@ pub fn ceil(value: usize, divisor: usize) -> usize {
107107
}
108108
}
109109

110+
#[derive(Debug)]
111+
pub struct BitsIter<'a> {
112+
bytes: &'a [u8],
113+
offset: usize,
114+
end_offset: usize,
115+
}
116+
117+
impl<'a> BitsIter<'a> {
118+
pub fn new(bytes: &'a [u8], offset: usize, len: usize) -> BitsIter<'a> {
119+
let end_offset = offset + len;
120+
if end_offset < offset || end_offset.div_ceil(8) > bytes.len() {
121+
panic!("BitsIter::new called with invalid offset or len. offset: {}, len: {}, bytes.len(): {}", offset, len, bytes.len());
122+
}
123+
BitsIter {
124+
bytes,
125+
offset,
126+
end_offset,
127+
}
128+
}
129+
}
130+
131+
impl<'a> Iterator for BitsIter<'a> {
132+
type Item = bool;
133+
fn next(&mut self) -> Option<bool> {
134+
if self.offset == self.end_offset {
135+
None
136+
} else {
137+
let bit = get_bit(self.bytes, self.offset);
138+
self.offset += 1;
139+
Some(bit)
140+
}
141+
}
142+
}
143+
110144
/// Performs SIMD bitwise binary operations.
111145
///
112146
/// # Safety

0 commit comments

Comments
 (0)