Skip to content

Commit 1ffcec3

Browse files
committed
[native] Add TextReader
1 parent b89a041 commit 1ffcec3

File tree

11 files changed

+186
-35
lines changed

11 files changed

+186
-35
lines changed

presto-native-execution/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@
382382
<configuration>
383383
<forkCount>1</forkCount>
384384
<reuseForks>false</reuseForks>
385-
<excludedGroups>remote-function,textfile_reader</excludedGroups>
385+
<excludedGroups>remote-function,textfile</excludedGroups>
386386
<systemPropertyVariables>
387387
<PRESTO_SERVER>/root/project/build/debug/presto_cpp/main/presto_server</PRESTO_SERVER>
388388
<DATA_DIR>/tmp/velox</DATA_DIR>
@@ -434,7 +434,7 @@
434434
<groupId>org.apache.maven.plugins</groupId>
435435
<artifactId>maven-surefire-plugin</artifactId>
436436
<configuration>
437-
<excludedGroups combine.self="override">writer,parquet,remote-function,textfile_reader,no_textfile_reader,async_data_cache</excludedGroups>
437+
<excludedGroups combine.self="override">writer,parquet,remote-function,textfile,async_data_cache</excludedGroups>
438438
</configuration>
439439
</plugin>
440440
</plugins>

presto-native-execution/presto_cpp/main/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ target_link_libraries(
6565
velox_dwio_orc_reader
6666
velox_dwio_parquet_reader
6767
velox_dwio_parquet_writer
68+
velox_dwio_text_reader_register
6869
velox_dwio_text_writer_register
6970
velox_dynamic_library_loader
7071
velox_encode

presto-native-execution/presto_cpp/main/PrestoServer.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
#include "velox/dwio/orc/reader/OrcReader.h"
6060
#include "velox/dwio/parquet/RegisterParquetReader.h"
6161
#include "velox/dwio/parquet/RegisterParquetWriter.h"
62+
#include "velox/dwio/text/RegisterTextReader.h"
6263
#include "velox/dwio/text/RegisterTextWriter.h"
6364
#include "velox/exec/OutputBufferManager.h"
6465
#include "velox/functions/prestosql/aggregates/RegisterAggregateFunctions.h"
@@ -1412,8 +1413,9 @@ void PrestoServer::registerFileReadersAndWriters() {
14121413
velox::orc::registerOrcReaderFactory();
14131414
velox::parquet::registerParquetReaderFactory();
14141415
velox::parquet::registerParquetWriterFactory();
1415-
if (SystemConfig::instance()->textWriterEnabled()) {
1416+
if (SystemConfig::instance()->textReaderWriterEnabled()) {
14161417
velox::text::registerTextWriterFactory();
1418+
velox::text::registerTextReaderFactory();
14171419
}
14181420
}
14191421

@@ -1422,7 +1424,8 @@ void PrestoServer::unregisterFileReadersAndWriters() {
14221424
velox::dwrf::unregisterDwrfWriterFactory();
14231425
velox::parquet::unregisterParquetReaderFactory();
14241426
velox::parquet::unregisterParquetWriterFactory();
1425-
if (SystemConfig::instance()->textWriterEnabled()) {
1427+
if (SystemConfig::instance()->textReaderWriterEnabled()) {
1428+
velox::text::unregisterTextReaderFactory();
14261429
velox::text::unregisterTextWriterFactory();
14271430
}
14281431
}

presto-native-execution/presto_cpp/main/common/Configs.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ SystemConfig::SystemConfig() {
260260
NUM_PROP(kExchangeIoEvbViolationThresholdMs, 1000),
261261
NUM_PROP(kHttpSrvIoEvbViolationThresholdMs, 1000),
262262
NUM_PROP(kMaxLocalExchangePartitionBufferSize, 65536),
263-
BOOL_PROP(kTextWriterEnabled, true),
263+
BOOL_PROP(kTextReaderWriterEnabled, true),
264264
BOOL_PROP(kCharNToVarcharImplicitCast, false),
265265
};
266266
}
@@ -923,8 +923,8 @@ uint64_t SystemConfig::maxLocalExchangePartitionBufferSize() const {
923923
.value();
924924
}
925925

926-
bool SystemConfig::textWriterEnabled() const {
927-
return optionalProperty<bool>(kTextWriterEnabled).value();
926+
bool SystemConfig::textReaderWriterEnabled() const {
927+
return optionalProperty<bool>(kTextReaderWriterEnabled).value();
928928
}
929929

930930
bool SystemConfig::charNToVarcharImplicitCast() const {

presto-native-execution/presto_cpp/main/common/Configs.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -759,9 +759,9 @@ class SystemConfig : public ConfigBase {
759759
static constexpr std::string_view kMaxLocalExchangePartitionBufferSize{
760760
"local-exchange.max-partition-buffer-size"};
761761

762-
// Add to temporarily help with gradual rollout for text writer
763-
// TODO: remove once text writer is fully rolled out
764-
static constexpr std::string_view kTextWriterEnabled{"text-writer-enabled"};
762+
// Add to temporarily help with gradual rollout for text reader and writer
763+
// TODO: remove once text reader and writer are fully rolled out
764+
static constexpr std::string_view kTextReaderWriterEnabled{"text-reader-writer-enabled"};
765765

766766
/// Enable the type char(n) with the same behavior as unbounded varchar.
767767
/// char(n) type is not supported by parser when set to false.
@@ -1057,7 +1057,7 @@ class SystemConfig : public ConfigBase {
10571057

10581058
uint64_t maxLocalExchangePartitionBufferSize() const;
10591059

1060-
bool textWriterEnabled() const;
1060+
bool textReaderWriterEnabled() const;
10611061

10621062
bool charNToVarcharImplicitCast() const;
10631063
};

presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java

Lines changed: 74 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,70 @@ public void testDateFilter()
403403
}
404404
}
405405

406+
@Test(groups = {"textfile"})
407+
public void testTableReader()
408+
{
409+
assertQuery("SELECT * FROM nation_text");
410+
}
411+
412+
@Test(groups = {"textfile"})
413+
public void testTableWrite()
414+
{
415+
String tmpTableName = generateRandomTableName();
416+
try {
417+
getQueryRunner().execute(String.format(
418+
"CREATE TABLE %s (" +
419+
"id BIGINT," +
420+
"name VARCHAR," +
421+
"is_active BOOLEAN," +
422+
"score DOUBLE," +
423+
"created_at TIMESTAMP," +
424+
"tags ARRAY<VARCHAR>," +
425+
"metrics ARRAY<DOUBLE>," +
426+
"properties MAP<VARCHAR, VARCHAR>," +
427+
"flags MAP<TINYINT, BOOLEAN>," +
428+
"nested_struct ROW(sub_id INTEGER, sub_name VARCHAR, sub_scores ARRAY<REAL>, sub_map MAP<SMALLINT, VARCHAR>)," +
429+
"ds VARCHAR" +
430+
") WITH (format = 'TEXTFILE', partitioned_by = ARRAY['ds'])", tmpTableName));
431+
432+
getQueryRunner().execute(String.format(
433+
"INSERT INTO %s (" +
434+
"id," +
435+
"name," +
436+
"is_active," +
437+
"score," +
438+
"created_at," +
439+
"tags," +
440+
"metrics," +
441+
"properties," +
442+
"flags," +
443+
"nested_struct," +
444+
"ds" +
445+
") VALUES (" +
446+
"1001," +
447+
"'Jane Doe'," +
448+
"TRUE," +
449+
"88.5," +
450+
"TIMESTAMP '2025-07-23 10:00:00'," +
451+
"ARRAY['alpha', 'beta', 'gamma']," +
452+
"ARRAY[3.14, 2.71, 1.41]," +
453+
"MAP(ARRAY['color', 'size'], ARRAY['blue', 'large'])," +
454+
"MAP(ARRAY[TINYINT '1', TINYINT '2'], ARRAY[TRUE, FALSE])," +
455+
"ROW(" +
456+
"42," +
457+
"'sub_jane'," +
458+
"ARRAY[REAL '1.1', REAL '2.2', REAL '3.3']," +
459+
"MAP(ARRAY[SMALLINT '10', SMALLINT '20'], ARRAY['foo', 'bar'])" +
460+
")," +
461+
"'2025-07-01'" +
462+
")", tmpTableName));
463+
assertQueryResultCount(String.format("SELECT count(*) FROM %s", tmpTableName), 1);
464+
}
465+
finally {
466+
dropTableIfExists(tmpTableName);
467+
}
468+
}
469+
406470
@Test
407471
public void testOrderBy()
408472
{
@@ -1259,18 +1323,6 @@ public void testReadTableWithUnsupportedJsonFormat()
12591323
assertQueryFails("SELECT * FROM nation_json", ".*ReaderFactory is not registered for format json.*");
12601324
}
12611325

1262-
@Test(groups = {"no_textfile_reader"})
1263-
public void testReadTableWithUnsupportedTextfileFormat()
1264-
{
1265-
assertQueryFails("SELECT * FROM nation_text", ".*ReaderFactory is not registered for format text.*");
1266-
}
1267-
1268-
@Test(groups = {"textfile_reader"})
1269-
public void testReadTableWithTextfileFormat()
1270-
{
1271-
assertQuery("SELECT * FROM nation_text");
1272-
}
1273-
12741326
private void dropTableIfExists(String tableName)
12751327
{
12761328
// An ugly workaround for the lack of getExpectedQueryRunner()
@@ -1671,10 +1723,10 @@ public void testSelectFieldsWithCapitalLetters()
16711723
ColumnMetadata.builder()
16721724
.setName("col")
16731725
.setType(RowType.from(ImmutableList.of(
1674-
new RowType.Field(Optional.of("NationKey"), BIGINT),
1675-
new RowType.Field(Optional.of("NAME"), VARCHAR),
1676-
new RowType.Field(Optional.of("ReGiOnKeY"), BIGINT),
1677-
new RowType.Field(Optional.of("commenT"), VARCHAR))))
1726+
new RowType.Field(Optional.of("NationKey"), BIGINT),
1727+
new RowType.Field(Optional.of("NAME"), VARCHAR),
1728+
new RowType.Field(Optional.of("ReGiOnKeY"), BIGINT),
1729+
new RowType.Field(Optional.of("commenT"), VARCHAR))))
16781730
.build()),
16791731
tableProperties);
16801732
transaction(queryRunner.getTransactionManager(), queryRunner.getAccessControl())
@@ -1830,15 +1882,16 @@ public void testUnicodeInJson()
18301882
{
18311883
// Test casting to JSON returning the same results for all unicode characters in the
18321884
// entire range.
1833-
List<int[]> unicodeRanges = new ArrayList<int[]>() {
1885+
List<int[]> unicodeRanges = new ArrayList<int[]>()
1886+
{
18341887
{
1835-
add(new int[]{0, 0x7F});
1836-
add(new int[]{0x80, 0xD7FF});
1837-
add(new int[]{0xE000, 0xFFFF});
1888+
add(new int[] {0, 0x7F});
1889+
add(new int[] {0x80, 0xD7FF});
1890+
add(new int[] {0xE000, 0xFFFF});
18381891
}
18391892
};
18401893
for (int start = 0x10000; start < 0x110000; start += 0x10000) {
1841-
unicodeRanges.add(new int[]{start, start + 0xFFFF});
1894+
unicodeRanges.add(new int[] {start, start + 0xFFFF});
18421895
}
18431896
List<String> unicodeStrings = unicodeRanges.stream().map(range -> {
18441897
StringBuilder unicodeString = new StringBuilder();

presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeTpcdsQueries.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ private static void createTpcdsCallCenter(QueryRunner queryRunner, Session sessi
9090
switch (storageFormat) {
9191
case "PARQUET":
9292
case "ORC":
93+
case "TEXTFILE":
9394
queryRunner.execute(session, "CREATE TABLE call_center AS " +
9495
"SELECT * FROM tpcds.tiny.call_center");
9596
break;
@@ -158,6 +159,7 @@ private static void createTpcdsDateDim(QueryRunner queryRunner, Session session,
158159
switch (storageFormat) {
159160
case "PARQUET":
160161
case "ORC":
162+
case "TEXTFILE":
161163
queryRunner.execute(session, "CREATE TABLE date_dim AS " +
162164
"SELECT * FROM tpcds.tiny.date_dim");
163165
break;
@@ -202,6 +204,7 @@ private static void createTpcdsItem(QueryRunner queryRunner, Session session, St
202204
switch (storageFormat) {
203205
case "PARQUET":
204206
case "ORC":
207+
case "TEXTFILE":
205208
queryRunner.execute(session, "CREATE TABLE item AS " +
206209
"SELECT * FROM tpcds.tiny.item");
207210
break;
@@ -246,6 +249,7 @@ private static void createTpcdsStore(QueryRunner queryRunner, Session session, S
246249
switch (storageFormat) {
247250
case "PARQUET":
248251
case "ORC":
252+
case "TEXTFILE":
249253
queryRunner.execute(session, "CREATE TABLE store AS " +
250254
"SELECT * FROM tpcds.tiny.store");
251255
break;
@@ -300,6 +304,7 @@ private static void createTpcdsWebPage(QueryRunner queryRunner, Session session,
300304
switch (storageFormat) {
301305
case "PARQUET":
302306
case "ORC":
307+
case "TEXTFILE":
303308
queryRunner.execute(session, "CREATE TABLE web_page AS " +
304309
"SELECT * FROM tpcds.tiny.web_page");
305310
break;
@@ -337,6 +342,7 @@ private static void createTpcdsWebSite(QueryRunner queryRunner, Session session,
337342
switch (storageFormat) {
338343
case "PARQUET":
339344
case "ORC":
345+
case "TEXTFILE":
340346
queryRunner.execute(session, "CREATE TABLE web_site AS " +
341347
"SELECT * FROM tpcds.tiny.web_site");
342348
break;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package com.facebook.presto.nativeworker;
15+
16+
import com.facebook.presto.testing.ExpectedQueryRunner;
17+
import com.facebook.presto.testing.QueryRunner;
18+
import org.testng.annotations.Test;
19+
20+
@Test(groups = {"textfile"})
21+
public class TestPrestoNativeTpcdsQueriesTextfileUsingThrift
22+
extends AbstractTestNativeTpcdsQueries
23+
{
24+
@Override
25+
protected QueryRunner createQueryRunner()
26+
throws Exception
27+
{
28+
return PrestoNativeQueryRunnerUtils.nativeHiveQueryRunnerBuilder()
29+
.setStorageFormat("TEXTFILE")
30+
.setAddStorageFormatToPath(true)
31+
.setUseThrift(true)
32+
.build();
33+
}
34+
35+
@Override
36+
protected ExpectedQueryRunner createExpectedQueryRunner()
37+
throws Exception
38+
{
39+
this.storageFormat = "TEXTFILE";
40+
return PrestoNativeQueryRunnerUtils.javaHiveQueryRunnerBuilder()
41+
.setStorageFormat(this.storageFormat)
42+
.setAddStorageFormatToPath(true)
43+
.build();
44+
}
45+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package com.facebook.presto.nativeworker;
15+
16+
import com.facebook.presto.testing.ExpectedQueryRunner;
17+
import com.facebook.presto.testing.QueryRunner;
18+
import org.testng.annotations.Test;
19+
20+
@Test(groups = {"textfile"})
21+
public class TestPrestoNativeTpchQueriesTextfileUsingJSON
22+
extends AbstractTestNativeTpchQueries
23+
{
24+
private final String storageFormat = "TEXTFILE";
25+
26+
@Override
27+
protected QueryRunner createQueryRunner() throws Exception
28+
{
29+
return PrestoNativeQueryRunnerUtils.nativeHiveQueryRunnerBuilder()
30+
.setStorageFormat(storageFormat)
31+
.setAddStorageFormatToPath(true)
32+
.build();
33+
}
34+
35+
@Override
36+
protected ExpectedQueryRunner createExpectedQueryRunner() throws Exception
37+
{
38+
return PrestoNativeQueryRunnerUtils.javaHiveQueryRunnerBuilder()
39+
.setStorageFormat(storageFormat)
40+
.setAddStorageFormatToPath(true)
41+
.build();
42+
}
43+
}

presto-native-execution/src/test/java/com/facebook/presto/spark/TestPrestoSparkNativeGeneralQueries.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ public void testUnicodeInJson()
6666
@Ignore
6767
public void testDistributedSortSingleNode() {}
6868

69-
// Disable: Text file reader is not supported. This test is also disabled in pom.xml through disabling groups "textfile_reader".
69+
// Disable: Text file reader and writer is not supported.
7070
@Override
71-
public void testReadTableWithTextfileFormat() {}
71+
public void testTableWrite() {}
7272

7373
// Disable: Not supporte by POS
7474
@Override

0 commit comments

Comments
 (0)