fix getField == null checks

the-other-tim-brown · the-other-tim-brown · commit 467eac417e64 · 2025-12-09T21:40:33.000-05:00
diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/SecondaryIndexRecordGenerationUtils.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/SecondaryIndexRecordGenerationUtils.java
@@ -344,7 +344,7 @@ public Pair<String, String> next() {
 
   private static HoodieSchema getRequestedSchemaForSecondaryIndex(HoodieTableMetaClient metaClient, HoodieSchema tableSchema, String secondaryKeyField) {
     String[] recordKeyFields;
-    if (tableSchema.getField(RECORD_KEY_METADATA_FIELD) != null) {
+    if (tableSchema.getField(RECORD_KEY_METADATA_FIELD).isPresent()) {
       recordKeyFields = new String[] {RECORD_KEY_METADATA_FIELD};
     } else {
       recordKeyFields = metaClient.getTableConfig().getRecordKeyFields().orElse(new String[0]);
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/BaseSparkInternalRowReaderContext.java b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/BaseSparkInternalRowReaderContext.java
@@ -31,7 +31,6 @@
 import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.storage.StorageConfiguration;
 
-import org.apache.avro.Schema;
 import org.apache.spark.sql.HoodieInternalRowUtils;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow;
@@ -87,10 +86,11 @@ public Option<HoodieRecordMerger> getRecordMerger(RecordMergeMode mergeMode, Str
    * @param partitionFieldAndValues the partition fields and their values, if any are required by the reader
    * @return a function for transforming the row
    */
-  protected UnaryOperator<InternalRow> getBootstrapProjection(Schema from, Schema to, List<Pair<String, Object>> partitionFieldAndValues) {
-    Map<Integer, Object> partitionValuesByIndex = partitionFieldAndValues.stream().collect(Collectors.toMap(pair -> to.getField(pair.getKey()).pos(), Pair::getRight));
+  protected UnaryOperator<InternalRow> getBootstrapProjection(HoodieSchema from, HoodieSchema to, List<Pair<String, Object>> partitionFieldAndValues) {
+    Map<Integer, Object> partitionValuesByIndex = partitionFieldAndValues.stream()
+        .collect(Collectors.toMap(pair -> to.getField(pair.getKey()).orElseThrow(() -> new IllegalArgumentException("Missing field: " + pair.getKey())).pos(), Pair::getRight));
     Function1<InternalRow, UnsafeRow> unsafeRowWriter =
-        HoodieInternalRowUtils.getCachedUnsafeRowWriter(getCachedSchema(from), getCachedSchema(to), Collections.emptyMap(), partitionValuesByIndex);
+        HoodieInternalRowUtils.getCachedUnsafeRowWriter(getCachedSchema(from.toAvroSchema()), getCachedSchema(to.toAvroSchema()), Collections.emptyMap(), partitionValuesByIndex);
     return row -> (InternalRow) unsafeRowWriter.apply(row);
   }
 
diff --git a/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkFileFormatInternalRowReaderContext.scala b/hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/SparkFileFormatInternalRowReaderContext.scala
@@ -152,10 +152,10 @@ class SparkFileFormatInternalRowReaderContext(baseFileReader: SparkColumnarFileR
 
       //If we need to do position based merging with log files we will leave the row index column at the end
       val dataProjection = if (getShouldMergeUseRecordPosition) {
-        getBootstrapProjection(dataRequiredSchema.toAvroSchema, dataRequiredSchema.toAvroSchema, partitionFieldAndValues)
+        getBootstrapProjection(dataRequiredSchema, dataRequiredSchema, partitionFieldAndValues)
       } else {
-        getBootstrapProjection(dataRequiredSchema.toAvroSchema,
-          HoodieAvroUtils.removeFields(dataRequiredSchema.toAvroSchema, rowIndexColumn), partitionFieldAndValues)
+        getBootstrapProjection(dataRequiredSchema,
+          HoodieSchemaUtils.removeFields(dataRequiredSchema, rowIndexColumn), partitionFieldAndValues)
       }
 
       //row index will always be the last column
@@ -209,7 +209,7 @@ class SparkFileFormatInternalRowReaderContext(baseFileReader: SparkColumnarFileR
         }
       }
     } else {
-      val dataProjection = getBootstrapProjection(dataRequiredSchema.toAvroSchema, dataRequiredSchema.toAvroSchema, partitionFieldAndValues)
+      val dataProjection = getBootstrapProjection(dataRequiredSchema, dataRequiredSchema, partitionFieldAndValues)
       new ClosableIterator[Any] {
         val combinedRow = new JoinedRow()
 
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieFileGroupReader.java b/hudi-common/src/main/java/org/apache/hudi/common/table/read/HoodieFileGroupReader.java
@@ -195,7 +195,7 @@ private ClosableIterator<T> makeBootstrapBaseFileIterator(HoodieBaseFile baseFil
         List<Pair<String, Object>> filterFieldsAndValues = new ArrayList<>(partitionFields.length);
         for (int i = 0; i < partitionFields.length; i++) {
           String field = partitionFields[i];
-          if (dataSchema.getField(field) != null) {
+          if (dataSchema.getField(field).isPresent()) {
             filterFieldsAndValues.add(Pair.of(field, readerContext.getRecordContext().convertPartitionValueToEngineType((Comparable) partitionValues[i])));
           }
         }
diff --git a/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FlinkRowDataReaderContext.java b/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/FlinkRowDataReaderContext.java
@@ -190,7 +190,7 @@ public void setSchemaHandler(FileGroupReaderSchemaHandler<RowData> schemaHandler
       return;
     }
     // primary key semantic is lost if not all primary key fields are included in the request schema.
-    boolean pkSemanticLost = Arrays.stream(recordKeysOpt.get()).anyMatch(k -> schemaHandler.getRequestedSchema().getField(k) == null);
+    boolean pkSemanticLost = Arrays.stream(recordKeysOpt.get()).anyMatch(k -> schemaHandler.getRequestedSchema().getField(k).isEmpty());
     if (pkSemanticLost) {
       return;
     }

Original file line number	Diff line number	Diff line change
`@@ -152,10 +152,10 @@ class SparkFileFormatInternalRowReaderContext(baseFileReader: SparkColumnarFileR`
`152`	`152`
`153`	`153`	`//If we need to do position based merging with log files we will leave the row index column at the end`
`154`	`154`	`val dataProjection = if (getShouldMergeUseRecordPosition) {`
`155`		`- getBootstrapProjection(dataRequiredSchema.toAvroSchema, dataRequiredSchema.toAvroSchema, partitionFieldAndValues)`
	`155`	`+ getBootstrapProjection(dataRequiredSchema, dataRequiredSchema, partitionFieldAndValues)`
`156`	`156`	`} else {`
`157`		`- getBootstrapProjection(dataRequiredSchema.toAvroSchema,`
`158`		`- HoodieAvroUtils.removeFields(dataRequiredSchema.toAvroSchema, rowIndexColumn), partitionFieldAndValues)`
	`157`	`+ getBootstrapProjection(dataRequiredSchema,`
	`158`	`+ HoodieSchemaUtils.removeFields(dataRequiredSchema, rowIndexColumn), partitionFieldAndValues)`
`159`	`159`	`}`
`160`	`160`
`161`	`161`	`//row index will always be the last column`
`@@ -209,7 +209,7 @@ class SparkFileFormatInternalRowReaderContext(baseFileReader: SparkColumnarFileR`
`209`	`209`	`}`
`210`	`210`	`}`
`211`	`211`	`} else {`
`212`		`- val dataProjection = getBootstrapProjection(dataRequiredSchema.toAvroSchema, dataRequiredSchema.toAvroSchema, partitionFieldAndValues)`
	`212`	`+ val dataProjection = getBootstrapProjection(dataRequiredSchema, dataRequiredSchema, partitionFieldAndValues)`
`213`	`213`	`new ClosableIterator[Any] {`
`214`	`214`	`val combinedRow = new JoinedRow()`
`215`	`215`
Original file line number	Diff line number	Diff line change
`@@ -195,7 +195,7 @@ private ClosableIterator<T> makeBootstrapBaseFileIterator(HoodieBaseFile baseFil`
`195`	`195`	`List<Pair<String, Object>> filterFieldsAndValues = new ArrayList<>(partitionFields.length);`
`196`	`196`	`for (int i = 0; i < partitionFields.length; i++) {`
`197`	`197`	`String field = partitionFields[i];`
`198`		`- if (dataSchema.getField(field) != null) {`
	`198`	`+ if (dataSchema.getField(field).isPresent()) {`
`199`	`199`	`filterFieldsAndValues.add(Pair.of(field, readerContext.getRecordContext().convertPartitionValueToEngineType((Comparable) partitionValues[i])));`
`200`	`200`	`}`
`201`	`201`	`}`
Original file line number	Diff line number	Diff line change
`@@ -190,7 +190,7 @@ public void setSchemaHandler(FileGroupReaderSchemaHandler<RowData> schemaHandler`
`190`	`190`	`return;`
`191`	`191`	`}`
`192`	`192`	`// primary key semantic is lost if not all primary key fields are included in the request schema.`
`193`		`- boolean pkSemanticLost = Arrays.stream(recordKeysOpt.get()).anyMatch(k -> schemaHandler.getRequestedSchema().getField(k) == null);`
	`193`	`+ boolean pkSemanticLost = Arrays.stream(recordKeysOpt.get()).anyMatch(k -> schemaHandler.getRequestedSchema().getField(k).isEmpty());`
`194`	`194`	`if (pkSemanticLost) {`
`195`	`195`	`return;`
`196`	`196`	`}`