Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions core/src/main/java/org/apache/iceberg/PartitionStats.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public class PartitionStats implements StructLike {
private int positionDeleteFileCount;
private long equalityDeleteRecordCount;
private int equalityDeleteFileCount;
private long totalRecordCount;
private Long totalRecordCount; // null by default
private Long lastUpdatedAt; // null by default
private Long lastUpdatedSnapshotId; // null by default

Expand Down Expand Up @@ -78,7 +78,15 @@ public int equalityDeleteFileCount() {
return equalityDeleteFileCount;
}

/**
* @deprecated since 1.9.0, will be removed in 1.10.0, use {@link #totalRecords()} instead.
*/
@Deprecated
Copy link
Contributor

@nastra nastra Mar 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we really need to deprecate the existing method and introduce a new one to handle this properly? I'm not a fan of having a method with optional in the name. Also what if the field itself becomes a Long but the method's return type stays the same?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe no one using it as the writer code is not present in previous version.

But just following the guidelines as it is a public interface and introduced in previous version.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

method's return type stays the same

We wanted to return null when not computed as per the discussion. We can't do that if the return type is primitive.

public long totalRecordCount() {
return totalRecordCount == null ? 0 : totalRecordCount;
}

public Long totalRecords() {
return totalRecordCount;
}

Expand Down Expand Up @@ -150,7 +158,12 @@ public void appendStats(PartitionStats entry) {
this.positionDeleteFileCount += entry.positionDeleteFileCount;
this.equalityDeleteRecordCount += entry.equalityDeleteRecordCount;
this.equalityDeleteFileCount += entry.equalityDeleteFileCount;
this.totalRecordCount += entry.totalRecordCount;

if (totalRecordCount == null) {
this.totalRecordCount = entry.totalRecordCount;
} else {
this.totalRecordCount += entry.totalRecordCount;
}

if (entry.lastUpdatedAt != null) {
updateSnapshotInfo(entry.lastUpdatedSnapshotId, entry.lastUpdatedAt);
Expand Down Expand Up @@ -236,8 +249,7 @@ public <T> void set(int pos, T value) {
this.equalityDeleteFileCount = value == null ? 0 : (int) value;
break;
case 9:
// optional field as per spec, implementation initialize to 0 for counters
this.totalRecordCount = value == null ? 0L : (long) value;
this.totalRecordCount = (Long) value;
break;
case 10:
this.lastUpdatedAt = (Long) value;
Expand Down
52 changes: 26 additions & 26 deletions core/src/test/java/org/apache/iceberg/TestPartitionStatsUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ public void testPartitionStats() throws Exception {
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
Expand All @@ -116,7 +116,7 @@ public void testPartitionStats() throws Exception {
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
Expand All @@ -129,7 +129,7 @@ public void testPartitionStats() throws Exception {
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
Expand All @@ -142,7 +142,7 @@ public void testPartitionStats() throws Exception {
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()));

Expand All @@ -168,7 +168,7 @@ public void testPartitionStats() throws Exception {
1, // one position delete file
0L,
0,
0L,
null,
snapshot2.timestampMillis(), // new snapshot from pos delete commit
snapshot2.snapshotId()),
Tuple.tuple(
Expand All @@ -181,7 +181,7 @@ public void testPartitionStats() throws Exception {
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
Expand All @@ -194,7 +194,7 @@ public void testPartitionStats() throws Exception {
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
Expand All @@ -207,7 +207,7 @@ public void testPartitionStats() throws Exception {
0,
eqDelete.recordCount(),
1, // one equality delete file
0L,
null,
snapshot3.timestampMillis(), // new snapshot from equality delete commit
snapshot3.snapshotId()));
}
Expand Down Expand Up @@ -247,7 +247,7 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception {
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
Expand All @@ -260,7 +260,7 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception {
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()));

Expand Down Expand Up @@ -288,7 +288,7 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception {
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
Expand All @@ -301,7 +301,7 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception {
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
Expand All @@ -314,7 +314,7 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception {
0,
0L,
0,
0L,
null,
snapshot2.timestampMillis(), // new snapshot
snapshot2.snapshotId()),
Tuple.tuple(
Expand All @@ -327,7 +327,7 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception {
0,
0L,
0,
0L,
null,
snapshot2.timestampMillis(), // new snapshot
snapshot2.snapshotId()),
Tuple.tuple(
Expand All @@ -340,7 +340,7 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception {
0,
0L,
0,
0L,
null,
snapshot2.timestampMillis(),
snapshot2.snapshotId()),
Tuple.tuple(
Expand All @@ -353,7 +353,7 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception {
0,
0L,
0,
0L,
null,
snapshot2.timestampMillis(),
snapshot2.snapshotId()),
Tuple.tuple(
Expand All @@ -366,7 +366,7 @@ public void testPartitionStatsWithSchemaEvolution() throws Exception {
0,
0L,
0,
0L,
null,
snapshot2.timestampMillis(),
snapshot2.snapshotId()));
}
Expand Down Expand Up @@ -410,7 +410,7 @@ public void testPartitionStatsWithBucketTransformSchemaEvolution() throws Except
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
Expand All @@ -423,7 +423,7 @@ public void testPartitionStatsWithBucketTransformSchemaEvolution() throws Except
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()));

Expand Down Expand Up @@ -459,7 +459,7 @@ public void testPartitionStatsWithBucketTransformSchemaEvolution() throws Except
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
Expand All @@ -472,7 +472,7 @@ public void testPartitionStatsWithBucketTransformSchemaEvolution() throws Except
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
Expand All @@ -485,7 +485,7 @@ public void testPartitionStatsWithBucketTransformSchemaEvolution() throws Except
0,
0L,
0,
0L,
null,
snapshot2.timestampMillis(),
snapshot2.snapshotId()),
Tuple.tuple(
Expand All @@ -498,7 +498,7 @@ public void testPartitionStatsWithBucketTransformSchemaEvolution() throws Except
0,
0L,
0,
0L,
null,
snapshot2.timestampMillis(),
snapshot2.snapshotId()),
Tuple.tuple(
Expand All @@ -511,7 +511,7 @@ public void testPartitionStatsWithBucketTransformSchemaEvolution() throws Except
0,
0L,
0,
0L,
null,
snapshot2.timestampMillis(),
snapshot2.snapshotId()),
Tuple.tuple(
Expand All @@ -524,7 +524,7 @@ public void testPartitionStatsWithBucketTransformSchemaEvolution() throws Except
0,
0L,
0,
0L,
null,
snapshot2.timestampMillis(),
snapshot2.snapshotId()));
}
Expand Down Expand Up @@ -572,7 +572,7 @@ private static void computeAndValidatePartitionStats(Table testTable, Tuple... e
PartitionStats::positionDeleteFileCount,
PartitionStats::equalityDeleteRecordCount,
PartitionStats::equalityDeleteFileCount,
PartitionStats::totalRecordCount,
PartitionStats::totalRecords,
PartitionStats::lastUpdatedAt,
PartitionStats::lastUpdatedSnapshotId)
.containsExactlyInAnyOrder(expectedValues);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,12 +270,12 @@ public void testOptionalFieldsWriting() throws Exception {
PartitionStats::positionDeleteFileCount,
PartitionStats::equalityDeleteRecordCount,
PartitionStats::equalityDeleteFileCount,
PartitionStats::totalRecordCount,
PartitionStats::totalRecords,
PartitionStats::lastUpdatedAt,
PartitionStats::lastUpdatedSnapshotId)
.isEqualTo(
Arrays.asList(
0L, 0, 0L, 0, 0L, null, null)); // null counters must be initialized to zero.
0L, 0, 0L, 0, null, null, null)); // null counters must be initialized to zero.

PartitionStatisticsFile statisticsFile =
PartitionStatsHandler.writePartitionStatsFile(testTable, 42L, dataSchema, expected);
Expand Down Expand Up @@ -352,7 +352,7 @@ public void testPartitionStats() throws Exception {
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
Expand All @@ -365,7 +365,7 @@ public void testPartitionStats() throws Exception {
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
Expand All @@ -378,7 +378,7 @@ public void testPartitionStats() throws Exception {
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
Expand All @@ -391,7 +391,7 @@ public void testPartitionStats() throws Exception {
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()));

Expand All @@ -416,7 +416,7 @@ public void testPartitionStats() throws Exception {
0,
eqDeletes.recordCount(),
1,
0L,
null,
snapshot3.timestampMillis(),
snapshot3.snapshotId()),
Tuple.tuple(
Expand All @@ -429,7 +429,7 @@ public void testPartitionStats() throws Exception {
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()),
Tuple.tuple(
Expand All @@ -442,7 +442,7 @@ public void testPartitionStats() throws Exception {
1,
0L,
0,
0L,
null,
snapshot2.timestampMillis(),
snapshot2.snapshotId()),
Tuple.tuple(
Expand All @@ -455,7 +455,7 @@ public void testPartitionStats() throws Exception {
0,
0L,
0,
0L,
null,
snapshot1.timestampMillis(),
snapshot1.snapshotId()));
}
Expand Down Expand Up @@ -517,7 +517,7 @@ private static void computeAndValidatePartitionStats(
PartitionStats::positionDeleteFileCount,
PartitionStats::equalityDeleteRecordCount,
PartitionStats::equalityDeleteFileCount,
PartitionStats::totalRecordCount,
PartitionStats::totalRecords,
PartitionStats::lastUpdatedAt,
PartitionStats::lastUpdatedSnapshotId)
.containsExactlyInAnyOrder(expectedValues);
Expand Down Expand Up @@ -591,7 +591,7 @@ private static boolean isEqual(
&& stats1.positionDeleteFileCount() == stats2.positionDeleteFileCount()
&& stats1.equalityDeleteRecordCount() == stats2.equalityDeleteRecordCount()
&& stats1.equalityDeleteFileCount() == stats2.equalityDeleteFileCount()
&& stats1.totalRecordCount() == stats2.totalRecordCount()
&& Objects.equals(stats1.totalRecords(), stats2.totalRecords())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not directly related to this PR, but why can't we handle equals/hashcode in PartitionStats directly?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PartitionStats has StructLike member for partition, it needs a comparator for equals(). But storing comparator for each PartitionStats is not efficient. Plus we also want to keep the members of this class same as partition stats spec.

It was discussed in one of the old PR.

&& Objects.equals(stats1.lastUpdatedAt(), stats2.lastUpdatedAt())
&& Objects.equals(stats1.lastUpdatedSnapshotId(), stats2.lastUpdatedSnapshotId());
}
Expand Down
Loading