Skip to content

Commit 9dc3789

Browse files
committed
[native] Add TextReader
1 parent 9108a9a commit 9dc3789

File tree

11 files changed

+180
-28
lines changed

11 files changed

+180
-28
lines changed

presto-native-execution/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@
413413
<configuration>
414414
<forkCount>1</forkCount>
415415
<reuseForks>false</reuseForks>
416-
<excludedGroups>remote-function,textfile_reader</excludedGroups>
416+
<excludedGroups>remote-function</excludedGroups>
417417
<systemPropertyVariables>
418418
<PRESTO_SERVER>/root/project/build/debug/presto_cpp/main/presto_server</PRESTO_SERVER>
419419
<DATA_DIR>/tmp/velox</DATA_DIR>
@@ -465,7 +465,7 @@
465465
<groupId>org.apache.maven.plugins</groupId>
466466
<artifactId>maven-surefire-plugin</artifactId>
467467
<configuration>
468-
<excludedGroups combine.self="override">writer,parquet,remote-function,textfile_reader,no_textfile_reader,async_data_cache</excludedGroups>
468+
<excludedGroups combine.self="override">writer,parquet,remote-function,textfile,async_data_cache</excludedGroups>
469469
</configuration>
470470
</plugin>
471471
</plugins>

presto-native-execution/presto_cpp/main/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ target_link_libraries(
7070
velox_dwio_orc_reader
7171
velox_dwio_parquet_reader
7272
velox_dwio_parquet_writer
73+
velox_dwio_text_reader_register
7374
velox_dwio_text_writer_register
7475
velox_dynamic_library_loader
7576
velox_encode

presto-native-execution/presto_cpp/main/PrestoServer.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
#include "velox/dwio/orc/reader/OrcReader.h"
6060
#include "velox/dwio/parquet/RegisterParquetReader.h"
6161
#include "velox/dwio/parquet/RegisterParquetWriter.h"
62+
#include "velox/dwio/text/RegisterTextReader.h"
6263
#include "velox/dwio/text/RegisterTextWriter.h"
6364
#include "velox/exec/OutputBufferManager.h"
6465
#include "velox/functions/prestosql/aggregates/RegisterAggregateFunctions.h"
@@ -1430,7 +1431,8 @@ void PrestoServer::registerFileReadersAndWriters() {
14301431
velox::orc::registerOrcReaderFactory();
14311432
velox::parquet::registerParquetReaderFactory();
14321433
velox::parquet::registerParquetWriterFactory();
1433-
if (SystemConfig::instance()->textWriterEnabled()) {
1434+
if (SystemConfig::instance()->textReaderWriterEnabled()) {
1435+
velox::text::registerTextReaderFactory();
14341436
velox::text::registerTextWriterFactory();
14351437
}
14361438
}
@@ -1440,7 +1442,8 @@ void PrestoServer::unregisterFileReadersAndWriters() {
14401442
velox::dwrf::unregisterDwrfWriterFactory();
14411443
velox::parquet::unregisterParquetReaderFactory();
14421444
velox::parquet::unregisterParquetWriterFactory();
1443-
if (SystemConfig::instance()->textWriterEnabled()) {
1445+
if (SystemConfig::instance()->textReaderWriterEnabled()) {
1446+
velox::text::unregisterTextReaderFactory();
14441447
velox::text::unregisterTextWriterFactory();
14451448
}
14461449
}

presto-native-execution/presto_cpp/main/common/Configs.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ SystemConfig::SystemConfig() {
261261
NUM_PROP(kExchangeIoEvbViolationThresholdMs, 1000),
262262
NUM_PROP(kHttpSrvIoEvbViolationThresholdMs, 1000),
263263
NUM_PROP(kMaxLocalExchangePartitionBufferSize, 65536),
264-
BOOL_PROP(kTextWriterEnabled, true),
264+
BOOL_PROP(kTextReaderWriterEnabled, true),
265265
BOOL_PROP(kCharNToVarcharImplicitCast, false),
266266
BOOL_PROP(kEnumTypesEnabled, true),
267267
};
@@ -929,8 +929,8 @@ uint64_t SystemConfig::maxLocalExchangePartitionBufferSize() const {
929929
.value();
930930
}
931931

932-
bool SystemConfig::textWriterEnabled() const {
933-
return optionalProperty<bool>(kTextWriterEnabled).value();
932+
bool SystemConfig::textReaderWriterEnabled() const {
933+
return optionalProperty<bool>(kTextReaderWriterEnabled).value();
934934
}
935935

936936
bool SystemConfig::charNToVarcharImplicitCast() const {

presto-native-execution/presto_cpp/main/common/Configs.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -761,9 +761,9 @@ class SystemConfig : public ConfigBase {
761761
static constexpr std::string_view kMaxLocalExchangePartitionBufferSize{
762762
"local-exchange.max-partition-buffer-size"};
763763

764-
// Add to temporarily help with gradual rollout for text writer
765-
// TODO: remove once text writer is fully rolled out
766-
static constexpr std::string_view kTextWriterEnabled{"text-writer-enabled"};
764+
// Add to temporarily help with gradual rollout for text reader and writer
765+
// TODO: remove once text reader and writer are fully rolled out
766+
static constexpr std::string_view kTextReaderWriterEnabled{"text-reader-writer-enabled"};
767767

768768
/// Enable the type char(n) with the same behavior as unbounded varchar.
769769
/// char(n) type is not supported by parser when set to false.
@@ -1067,7 +1067,7 @@ class SystemConfig : public ConfigBase {
10671067

10681068
uint64_t maxLocalExchangePartitionBufferSize() const;
10691069

1070-
bool textWriterEnabled() const;
1070+
bool textReaderWriterEnabled() const;
10711071

10721072
bool charNToVarcharImplicitCast() const;
10731073

presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java

Lines changed: 70 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,76 @@ public void testDateFilter()
413413
}
414414
}
415415

416+
@Test(groups = {"textfile"})
417+
public void testReadTableWithTextfileFormat()
418+
{
419+
assertQuery("SELECT * FROM nation_text");
420+
421+
String tmpTableName = generateRandomTableName();
422+
try {
423+
getExpectedQueryRunner().execute(getSession(), format(
424+
"CREATE TABLE %s (" +
425+
"id BIGINT," +
426+
"name VARCHAR," +
427+
"is_active BOOLEAN," +
428+
"score DOUBLE," +
429+
"created_at TIMESTAMP," +
430+
"tags ARRAY<VARCHAR>," +
431+
"metrics ARRAY<DOUBLE>," +
432+
"properties MAP<VARCHAR, VARCHAR>," +
433+
"flags MAP<TINYINT, BOOLEAN>," +
434+
"nested_struct ROW(sub_id INTEGER, sub_name VARCHAR, sub_scores ARRAY<REAL>, sub_map MAP<SMALLINT, VARCHAR>)," +
435+
"price DECIMAL(15,2)," +
436+
"amount DECIMAL(21,6)," +
437+
"event_date DATE," +
438+
"ds VARCHAR" +
439+
") WITH (format = 'TEXTFILE', partitioned_by = ARRAY['ds'])", tmpTableName), ImmutableList.of());
440+
getExpectedQueryRunner().execute(getSession(), format(
441+
"INSERT INTO %s (" +
442+
"id," +
443+
"name," +
444+
"is_active," +
445+
"score," +
446+
"created_at," +
447+
"tags," +
448+
"metrics," +
449+
"properties," +
450+
"flags," +
451+
"nested_struct," +
452+
"price," +
453+
"amount," +
454+
"event_date," +
455+
"ds" +
456+
") VALUES (" +
457+
"1001," +
458+
"'Jane Doe'," +
459+
"TRUE," +
460+
"88.5," +
461+
"TIMESTAMP '2025-07-23 10:00:00'," +
462+
"ARRAY['alpha', 'beta', 'gamma']," +
463+
"ARRAY[3.14, 2.71, 1.41]," +
464+
"MAP(ARRAY['color', 'size'], ARRAY['blue', 'large'])," +
465+
"MAP(ARRAY[TINYINT '1', TINYINT '2'], ARRAY[TRUE, FALSE])," +
466+
"ROW(" +
467+
"42," +
468+
"'sub_jane'," +
469+
"ARRAY[REAL '1.1', REAL '2.2', REAL '3.3']," +
470+
"MAP(ARRAY[SMALLINT '10', SMALLINT '20'], ARRAY['foo', 'bar'])" +
471+
")," +
472+
"DECIMAL '12.34'," +
473+
"CAST('-123456789012345.123456' as DECIMAL(21,6))," +
474+
"DATE '2024-02-29'," +
475+
"'2025-07-01'" +
476+
")", tmpTableName), ImmutableList.of());
477+
// created_at is skipped because of the inconsistency in TIMESTAMP columns between Presto and Velox.
478+
// https://github.com/facebookincubator/velox/issues/8127
479+
assertQuery(format("SELECT id, name, is_active, score, tags, metrics, properties, flags, nested_struct, price, amount, event_date, ds FROM %s", tmpTableName));
480+
}
481+
finally {
482+
dropTableIfExists(tmpTableName);
483+
}
484+
}
485+
416486
@Test
417487
public void testOrderBy()
418488
{
@@ -1269,18 +1339,6 @@ public void testReadTableWithUnsupportedJsonFormat()
12691339
assertQueryFails("SELECT * FROM nation_json", "(?s).*ReaderFactory is not registered for format json.*");
12701340
}
12711341

1272-
@Test(groups = {"no_textfile_reader"})
1273-
public void testReadTableWithUnsupportedTextfileFormat()
1274-
{
1275-
assertQueryFails("SELECT * FROM nation_text", "(?s).*ReaderFactory is not registered for format text.*");
1276-
}
1277-
1278-
@Test(groups = {"textfile_reader"})
1279-
public void testReadTableWithTextfileFormat()
1280-
{
1281-
assertQuery("SELECT * FROM nation_text");
1282-
}
1283-
12841342
private void dropTableIfExists(String tableName)
12851343
{
12861344
// An ugly workaround for the lack of getExpectedQueryRunner()

presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeTpcdsQueries.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ private static void createTpcdsCallCenter(QueryRunner queryRunner, Session sessi
9090
switch (storageFormat) {
9191
case "PARQUET":
9292
case "ORC":
93+
case "TEXTFILE":
9394
queryRunner.execute(session, "CREATE TABLE call_center AS " +
9495
"SELECT * FROM tpcds.tiny.call_center");
9596
break;
@@ -158,6 +159,7 @@ private static void createTpcdsDateDim(QueryRunner queryRunner, Session session,
158159
switch (storageFormat) {
159160
case "PARQUET":
160161
case "ORC":
162+
case "TEXTFILE":
161163
queryRunner.execute(session, "CREATE TABLE date_dim AS " +
162164
"SELECT * FROM tpcds.tiny.date_dim");
163165
break;
@@ -202,6 +204,7 @@ private static void createTpcdsItem(QueryRunner queryRunner, Session session, St
202204
switch (storageFormat) {
203205
case "PARQUET":
204206
case "ORC":
207+
case "TEXTFILE":
205208
queryRunner.execute(session, "CREATE TABLE item AS " +
206209
"SELECT * FROM tpcds.tiny.item");
207210
break;
@@ -246,6 +249,7 @@ private static void createTpcdsStore(QueryRunner queryRunner, Session session, S
246249
switch (storageFormat) {
247250
case "PARQUET":
248251
case "ORC":
252+
case "TEXTFILE":
249253
queryRunner.execute(session, "CREATE TABLE store AS " +
250254
"SELECT * FROM tpcds.tiny.store");
251255
break;
@@ -300,6 +304,7 @@ private static void createTpcdsWebPage(QueryRunner queryRunner, Session session,
300304
switch (storageFormat) {
301305
case "PARQUET":
302306
case "ORC":
307+
case "TEXTFILE":
303308
queryRunner.execute(session, "CREATE TABLE web_page AS " +
304309
"SELECT * FROM tpcds.tiny.web_page");
305310
break;
@@ -337,6 +342,7 @@ private static void createTpcdsWebSite(QueryRunner queryRunner, Session session,
337342
switch (storageFormat) {
338343
case "PARQUET":
339344
case "ORC":
345+
case "TEXTFILE":
340346
queryRunner.execute(session, "CREATE TABLE web_site AS " +
341347
"SELECT * FROM tpcds.tiny.web_site");
342348
break;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package com.facebook.presto.nativeworker;
15+
16+
import com.facebook.presto.testing.ExpectedQueryRunner;
17+
import com.facebook.presto.testing.QueryRunner;
18+
import org.testng.annotations.Test;
19+
20+
@Test(groups = {"textfile"})
21+
public class TestPrestoNativeTpcdsQueriesTextfileUsingThrift
22+
extends AbstractTestNativeTpcdsQueries
23+
{
24+
@Override
25+
protected QueryRunner createQueryRunner()
26+
throws Exception
27+
{
28+
return PrestoNativeQueryRunnerUtils.nativeHiveQueryRunnerBuilder()
29+
.setStorageFormat("TEXTFILE")
30+
.setAddStorageFormatToPath(true)
31+
.setUseThrift(true)
32+
.build();
33+
}
34+
35+
@Override
36+
protected ExpectedQueryRunner createExpectedQueryRunner()
37+
throws Exception
38+
{
39+
this.storageFormat = "TEXTFILE";
40+
return PrestoNativeQueryRunnerUtils.javaHiveQueryRunnerBuilder()
41+
.setStorageFormat(this.storageFormat)
42+
.setAddStorageFormatToPath(true)
43+
.build();
44+
}
45+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package com.facebook.presto.nativeworker;
15+
16+
import com.facebook.presto.testing.ExpectedQueryRunner;
17+
import com.facebook.presto.testing.QueryRunner;
18+
import org.testng.annotations.Test;
19+
20+
@Test(groups = {"textfile"})
21+
public class TestPrestoNativeTpchQueriesTextfileUsingJSON
22+
extends AbstractTestNativeTpchQueries
23+
{
24+
private final String storageFormat = "TEXTFILE";
25+
26+
@Override
27+
protected QueryRunner createQueryRunner() throws Exception
28+
{
29+
return PrestoNativeQueryRunnerUtils.nativeHiveQueryRunnerBuilder()
30+
.setStorageFormat(storageFormat)
31+
.setAddStorageFormatToPath(true)
32+
.build();
33+
}
34+
35+
@Override
36+
protected ExpectedQueryRunner createExpectedQueryRunner() throws Exception
37+
{
38+
return PrestoNativeQueryRunnerUtils.javaHiveQueryRunnerBuilder()
39+
.setStorageFormat(storageFormat)
40+
.setAddStorageFormatToPath(true)
41+
.build();
42+
}
43+
}

presto-native-execution/src/test/java/com/facebook/presto/spark/TestPrestoSparkNativeGeneralQueries.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,6 @@ public void testUnicodeInJson()
8282
@Ignore
8383
public void testDistributedSortSingleNode() {}
8484

85-
// Disable: Text file reader is not supported. This test is also disabled in pom.xml through disabling groups "textfile_reader".
86-
@Override
87-
public void testReadTableWithTextfileFormat() {}
88-
8985
// Disable: Not supporte by POS
9086
@Override
9187
@Ignore

0 commit comments

Comments
 (0)