Skip to content

Commit

Permalink
OTF-920 - fix NullPointerException
Browse files Browse the repository at this point in the history
Handle case where the VectorHolder contains a null value
  • Loading branch information
slessard authored and sl255051 committed Aug 21, 2024
1 parent 66f589d commit 1b255c0
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@ private ArrowVectorAccessor<DecimalT, Utf8StringT, ArrayT, ChildVectorT> getPlai
return new FixedSizeBinaryAccessor<>(
(FixedSizeBinaryVector) vector, stringFactorySupplier.get());
}
throw new UnsupportedOperationException("Unsupported vector: " + vector.getClass());
String vectorName = (vector == null) ? "null" : vector.getClass().toString();
throw new UnsupportedOperationException("Unsupported vector: " + vectorName);
}

private static boolean isDecimal(PrimitiveType primitive) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import static org.apache.iceberg.Files.localInput;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import java.io.File;
import java.io.IOException;
Expand Down Expand Up @@ -59,6 +60,7 @@
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.FieldType;
import org.apache.iceberg.AppendFiles;
import org.apache.iceberg.DataFile;
import org.apache.iceberg.DataFiles;
import org.apache.iceberg.FileFormat;
Expand Down Expand Up @@ -263,6 +265,55 @@ public void testReadColumnFilter2() throws Exception {
scan, NUM_ROWS_PER_MONTH, 12 * NUM_ROWS_PER_MONTH, ImmutableList.of("timestamp"));
}

@Test
public void testThrowsUOEWhenNewColumnHasNoValue() throws Exception {
rowsWritten = Lists.newArrayList();
tables = new HadoopTables();

Schema schema =
new Schema(
Types.NestedField.required(1, "a", Types.IntegerType.get()),
Types.NestedField.optional(2, "b", Types.StringType.get()),
Types.NestedField.required(3, "c", Types.DecimalType.of(12, 3)));

PartitionSpec spec = PartitionSpec.builderFor(schema).build();
Table table1 = tables.create(schema, spec, tableLocation);

// Add one record to the table
GenericRecord rec = GenericRecord.create(schema);
rec.setField("a", 1);
rec.setField("b", "san diego");
rec.setField("c", new BigDecimal("1024.025"));
List<GenericRecord> genericRecords = Lists.newArrayList();
genericRecords.add(rec);

AppendFiles appendFiles = table1.newAppend();
appendFiles.appendFile(writeParquetFile(table1, genericRecords));
appendFiles.commit();

// Alter the table schema by adding a new, optional column.
// Do not add any data for this new column in the one existing row in the table
// and do not insert any new rows into the table.
Table table = tables.load(tableLocation);
table.updateSchema().addColumn("a1", Types.IntegerType.get()).commit();

// Select all columns, all rows from the table
TableScan scan = table.newScan().select("*");

assertThatThrownBy(
() -> {
// Read the data.
try (VectorizedTableScanIterable itr =
new VectorizedTableScanIterable(scan, 1000, false)) {
for (ColumnarBatch batch : itr) {
// no-op
}
}
})
.isInstanceOf(UnsupportedOperationException.class)
.hasMessage("Unsupported vector: null");
}

/**
* The test asserts that {@link CloseableIterator#hasNext()} returned by the {@link ArrowReader}
* is idempotent.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.arrow.vectorized;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

import java.math.BigDecimal;
import java.util.function.Supplier;
import org.apache.arrow.vector.IntVector;
import org.apache.iceberg.types.Types;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.schema.PrimitiveType;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;

class GenericArrowVectorAccessorFactoryTest {
@Mock
Supplier<GenericArrowVectorAccessorFactory.DecimalFactory<BigDecimal>> decimalFactorySupplier;

@Mock Supplier<GenericArrowVectorAccessorFactory.StringFactory<String>> stringFactorySupplier;

@Mock
Supplier<GenericArrowVectorAccessorFactory.StructChildFactory<Integer>>
structChildFactorySupplier;

@Mock
Supplier<GenericArrowVectorAccessorFactory.ArrayFactory<Integer, Integer[]>> arrayFactorySupplier;

@InjectMocks GenericArrowVectorAccessorFactory genericArrowVectorAccessorFactory;

@BeforeEach
void before() {
MockitoAnnotations.openMocks(this);
}

@Test
void testGetVectorAccessorWithIntVector() {
IntVector vector = mock(IntVector.class);
when(vector.get(0)).thenReturn(88);

Types.NestedField nestedField = Types.NestedField.optional(0, "a1", Types.IntegerType.get());
ColumnDescriptor columnDescriptor =
new ColumnDescriptor(
new String[] {nestedField.name()}, PrimitiveType.PrimitiveTypeName.INT32, 0, 1);
NullabilityHolder nullabilityHolder = new NullabilityHolder(10000);
VectorHolder vectorHolder =
new VectorHolder(columnDescriptor, vector, false, null, nullabilityHolder, nestedField);
ArrowVectorAccessor actual = genericArrowVectorAccessorFactory.getVectorAccessor(vectorHolder);
assertThat(actual).isNotNull();
assertThat(actual).isInstanceOf(ArrowVectorAccessor.class);
int intValue = actual.getInt(0);
assertThat(intValue).isEqualTo(88);
}

@Test
void testGetVectorAccessorWithNullVector() {
assertThatThrownBy(
() -> {
genericArrowVectorAccessorFactory.getVectorAccessor(VectorHolder.dummyHolder(1));
})
.isInstanceOf(UnsupportedOperationException.class)
.hasMessage("Unsupported vector: null");
}
}

0 comments on commit 1b255c0

Please sign in to comment.