Skip to content

Commit 6f91fd7

Browse files
Unable to read Delta tables with spaces in location or partition values
1 parent b92b8f2 commit 6f91fd7

File tree

38 files changed

+86
-3
lines changed

38 files changed

+86
-3
lines changed

presto-delta/src/main/java/com/facebook/presto/delta/DeltaExpressionUtils.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,10 @@
3030
import io.delta.kernel.data.Row;
3131
import io.delta.kernel.internal.InternalScanFileUtils;
3232
import io.delta.kernel.utils.CloseableIterator;
33+
import org.apache.hadoop.fs.Path;
3334

3435
import java.io.IOException;
36+
import java.net.URI;
3537
import java.sql.Date;
3638
import java.sql.Timestamp;
3739
import java.util.Iterator;
@@ -239,7 +241,7 @@ private static boolean evaluatePartitionPredicate(
239241
for (DeltaColumnHandle partitionColumn : partitionColumns) {
240242
String columnName = partitionColumn.getName();
241243
String partitionValue = InternalScanFileUtils.getPartitionValues(row).get(columnName);
242-
String filePath = InternalScanFileUtils.getAddFileStatus(row).getPath();
244+
String filePath = new Path(URI.create(InternalScanFileUtils.getAddFileStatus(row).getPath())).toString();
243245
logger.debug("Obtaining domain of file: " + filePath);
244246
Domain domain = getDomain(partitionColumn, partitionValue, typeManager, filePath);
245247
Optional<Map<String, Domain>> domains = partitionPredicate.getDomains();

presto-delta/src/main/java/com/facebook/presto/delta/DeltaSplitManager.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,11 @@
2727
import io.delta.kernel.utils.CloseableIterator;
2828
import io.delta.kernel.utils.FileStatus;
2929
import jakarta.inject.Inject;
30+
import org.apache.hadoop.fs.Path;
3031

3132
import java.io.IOException;
3233
import java.io.UncheckedIOException;
34+
import java.net.URI;
3335
import java.util.Map;
3436
import java.util.Map.Entry;
3537
import java.util.concurrent.CompletableFuture;
@@ -97,7 +99,7 @@ public CompletableFuture<ConnectorSplitBatch> getNextBatch(ConnectorPartitionHan
9799
connectorId,
98100
deltaTable.getSchemaName(),
99101
deltaTable.getTableName(),
100-
addFileStatus.getPath(),
102+
new Path(URI.create(addFileStatus.getPath())).toString(),
101103
0, /* start */
102104
addFileStatus.getSize() /* split length - default is read the entire file in one split */,
103105
addFileStatus.getSize(),

presto-delta/src/test/java/com/facebook/presto/delta/AbstractDeltaDistributedQueryTestBase.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ public abstract class AbstractDeltaDistributedQueryTestBase
6262
"test-partitions-lowercase",
6363
"test-uppercase",
6464
"test-partitions-uppercase",
65-
"test-typing"
65+
"test-typing",
66+
"test-spaces"
6667
};
6768

6869
/**
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package com.facebook.presto.delta;
15+
16+
import org.testng.annotations.Test;
17+
18+
import static java.lang.String.format;
19+
20+
/**
21+
* Tests for reading Delta tables with spaces in location or partition.
22+
*/
23+
@Test
24+
public class TestDeltaSpaces
25+
extends AbstractDeltaDistributedQueryTestBase
26+
{
27+
@Test(dataProvider = "deltaReaderVersions")
28+
public void readDataWithSpaces(String version)
29+
{
30+
// Tests for reading Delta tables with spaces in location or partition.
31+
String testQuery =
32+
format("SELECT * FROM \"%s\".\"%s\" WHERE country = 'SOUTH AFRICA'", PATH_SCHEMA, goldenTablePathWithPrefix(version,
33+
"test-spaces"));
34+
35+
// read snapshot version 3
36+
String testQueryV3 = format(testQuery, "v3");
37+
String expResultsQueryV3 = "SELECT * FROM VALUES('Chima', 'manda', 'SOUTH AFRICA'), ('Adi', 'chie', 'SOUTH AFRICA')";
38+
assertQuery(testQueryV3, expResultsQueryV3);
39+
}
40+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"txnId":"0f5c61a9-c9fe-48d2-800c-24db752e063b","tableSizeBytes":0,"numFiles":0,"numDeletedRecordsOpt":0,"numDeletionVectorsOpt":0,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"72fbf1d8-14b1-4d72-acf1-4635d36eeab9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"first_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"last_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"country\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["country"],"configuration":{"delta.checkpoint.writeStatsAsJson":"false","delta.checkpoint.writeStatsAsStruct":"true","delta.enableDeletionVectors":"true"},"createdTime":1756119495478},"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors","appendOnly","invariants"]},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},"deletedRecordCountsHistogramOpt":{"deletedRecordCounts":[0,0,0,0,0,0,0,0,0,0]},"allFiles":[]}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{"commitInfo":{"timestamp":1756119495831,"userId":"4986169989250709","userName":"[email protected]","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[\"country\"]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.checkpoint.writeStatsAsJson\":\"false\",\"delta.checkpoint.writeStatsAsStruct\":\"true\",\"delta.enableDeletionVectors\":\"true\"}","statsOnLoad":false},"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/17.0.x-aarch64-photon-scala2.13","txnId":"0f5c61a9-c9fe-48d2-800c-24db752e063b"}}
2+
{"metaData":{"id":"72fbf1d8-14b1-4d72-acf1-4635d36eeab9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"first_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"last_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"country\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["country"],"configuration":{"delta.checkpoint.writeStatsAsJson":"false","delta.checkpoint.writeStatsAsStruct":"true","delta.enableDeletionVectors":"true"},"createdTime":1756119495478}}
3+
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors","appendOnly","invariants"]}}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"txnId":"60aa6440-4633-426a-8b4a-d45dfab4e928","tableSizeBytes":880,"numFiles":1,"numDeletedRecordsOpt":0,"numDeletionVectorsOpt":0,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"72fbf1d8-14b1-4d72-acf1-4635d36eeab9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"first_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"last_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"country\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["country"],"configuration":{"delta.checkpoint.writeStatsAsJson":"false","delta.checkpoint.writeStatsAsStruct":"true","delta.enableDeletionVectors":"true"},"createdTime":1756119495478},"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors","appendOnly","invariants"]},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[880,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},"deletedRecordCountsHistogramOpt":{"deletedRecordCounts":[1,0,0,0,0,0,0,0,0,0]},"allFiles":[{"path":"country=SOUTH%20AFRICA/part-00000-4aad1292-1c82-4525-a3af-4d0573f9c62d.c000.snappy.parquet","partitionValues":{"country":"SOUTH AFRICA"},"size":880,"modificationTime":1756119727000,"dataChange":false,"stats":"{\"numRecords\":1,\"minValues\":{\"first_name\":\"Chima\",\"last_name\":\"manda\"},\"maxValues\":{\"first_name\":\"Chima\",\"last_name\":\"manda\"},\"nullCount\":{\"first_name\":0,\"last_name\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1756119727000000","MIN_INSERTION_TIME":"1756119727000000","MAX_INSERTION_TIME":"1756119727000000","OPTIMIZE_TARGET_SIZE":"268435456"}}]}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{"commitInfo":{"timestamp":1756119727541,"userId":"4986169989250709","userName":"[email protected]","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"880"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/17.0.x-aarch64-photon-scala2.13","txnId":"60aa6440-4633-426a-8b4a-d45dfab4e928"}}
2+
{"add":{"path":"country=SOUTH%20AFRICA/part-00000-4aad1292-1c82-4525-a3af-4d0573f9c62d.c000.snappy.parquet","partitionValues":{"country":"SOUTH AFRICA"},"size":880,"modificationTime":1756119727000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"first_name\":\"Chima\",\"last_name\":\"manda\"},\"maxValues\":{\"first_name\":\"Chima\",\"last_name\":\"manda\"},\"nullCount\":{\"first_name\":0,\"last_name\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1756119727000000","MIN_INSERTION_TIME":"1756119727000000","MAX_INSERTION_TIME":"1756119727000000","OPTIMIZE_TARGET_SIZE":"268435456"}}}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"txnId":"233a0b1b-d7d2-4b49-b47a-c3e920bf5957","tableSizeBytes":1745,"numFiles":2,"numDeletedRecordsOpt":0,"numDeletionVectorsOpt":0,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"72fbf1d8-14b1-4d72-acf1-4635d36eeab9","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"first_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"last_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"country\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["country"],"configuration":{"delta.checkpoint.writeStatsAsJson":"false","delta.checkpoint.writeStatsAsStruct":"true","delta.enableDeletionVectors":"true"},"createdTime":1756119495478},"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors"],"writerFeatures":["deletionVectors","appendOnly","invariants"]},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[1745,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},"deletedRecordCountsHistogramOpt":{"deletedRecordCounts":[2,0,0,0,0,0,0,0,0,0]},"allFiles":[{"path":"country=SOUTH%20AFRICA/part-00000-4aad1292-1c82-4525-a3af-4d0573f9c62d.c000.snappy.parquet","partitionValues":{"country":"SOUTH AFRICA"},"size":880,"modificationTime":1756119727000,"dataChange":false,"stats":"{\"numRecords\":1,\"minValues\":{\"first_name\":\"Chima\",\"last_name\":\"manda\"},\"maxValues\":{\"first_name\":\"Chima\",\"last_name\":\"manda\"},\"nullCount\":{\"first_name\":0,\"last_name\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1756119727000000","MIN_INSERTION_TIME":"1756119727000000","MAX_INSERTION_TIME":"1756119727000000","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"country=SOUTH%20AFRICA/part-00000-e850600d-c9a8-428c-864e-308d42456618.c000.snappy.parquet","partitionValues":{"country":"SOUTH AFRICA"},"size":865,"modificationTime":1756119745000,"dataChange":false,"stats":"{\"numRecords\":1,\"minValues\":{\"first_name\":\"Adi\",\"last_name\":\"chie\"},\"maxValues\":{\"first_name\":\"Adi\",\"last_name\":\"chie\"},\"nullCount\":{\"first_name\":0,\"last_name\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1756119745000000","MIN_INSERTION_TIME":"1756119745000000","MAX_INSERTION_TIME":"1756119745000000","OPTIMIZE_TARGET_SIZE":"268435456"}}]}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{"commitInfo":{"timestamp":1756119745919,"userId":"4986169989250709","userName":"[email protected]","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"865"},"tags":{"noRowsCopied":"true","restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/17.0.x-aarch64-photon-scala2.13","txnId":"233a0b1b-d7d2-4b49-b47a-c3e920bf5957"}}
2+
{"add":{"path":"country=SOUTH%20AFRICA/part-00000-e850600d-c9a8-428c-864e-308d42456618.c000.snappy.parquet","partitionValues":{"country":"SOUTH AFRICA"},"size":865,"modificationTime":1756119745000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"first_name\":\"Adi\",\"last_name\":\"chie\"},\"maxValues\":{\"first_name\":\"Adi\",\"last_name\":\"chie\"},\"nullCount\":{\"first_name\":0,\"last_name\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1756119745000000","MIN_INSERTION_TIME":"1756119745000000","MAX_INSERTION_TIME":"1756119745000000","OPTIMIZE_TARGET_SIZE":"268435456"}}}

0 commit comments

Comments
 (0)