Skip to content

Commit 8e18145

Browse files
committed
[native] Add TextReader
1 parent d5abf92 commit 8e18145

File tree

11 files changed

+181
-25
lines changed

11 files changed

+181
-25
lines changed

presto-native-execution/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@
382382
<configuration>
383383
<forkCount>1</forkCount>
384384
<reuseForks>false</reuseForks>
385-
<excludedGroups>remote-function,textfile_reader</excludedGroups>
385+
<excludedGroups>remote-function</excludedGroups>
386386
<systemPropertyVariables>
387387
<PRESTO_SERVER>/root/project/build/debug/presto_cpp/main/presto_server</PRESTO_SERVER>
388388
<DATA_DIR>/tmp/velox</DATA_DIR>
@@ -434,7 +434,7 @@
434434
<groupId>org.apache.maven.plugins</groupId>
435435
<artifactId>maven-surefire-plugin</artifactId>
436436
<configuration>
437-
<excludedGroups combine.self="override">writer,parquet,remote-function,textfile_reader,no_textfile_reader,async_data_cache</excludedGroups>
437+
<excludedGroups combine.self="override">writer,parquet,remote-function,textfile,async_data_cache</excludedGroups>
438438
</configuration>
439439
</plugin>
440440
</plugins>

presto-native-execution/presto_cpp/main/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ target_link_libraries(
6565
velox_dwio_orc_reader
6666
velox_dwio_parquet_reader
6767
velox_dwio_parquet_writer
68+
velox_dwio_text_reader_register
6869
velox_dwio_text_writer_register
6970
velox_dynamic_library_loader
7071
velox_encode

presto-native-execution/presto_cpp/main/PrestoServer.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
#include "velox/dwio/orc/reader/OrcReader.h"
6060
#include "velox/dwio/parquet/RegisterParquetReader.h"
6161
#include "velox/dwio/parquet/RegisterParquetWriter.h"
62+
#include "velox/dwio/text/RegisterTextReader.h"
6263
#include "velox/dwio/text/RegisterTextWriter.h"
6364
#include "velox/exec/OutputBufferManager.h"
6465
#include "velox/functions/prestosql/aggregates/RegisterAggregateFunctions.h"
@@ -1412,8 +1413,9 @@ void PrestoServer::registerFileReadersAndWriters() {
14121413
velox::orc::registerOrcReaderFactory();
14131414
velox::parquet::registerParquetReaderFactory();
14141415
velox::parquet::registerParquetWriterFactory();
1415-
if (SystemConfig::instance()->textWriterEnabled()) {
1416+
if (SystemConfig::instance()->textReaderWriterEnabled()) {
14161417
velox::text::registerTextWriterFactory();
1418+
velox::text::registerTextReaderFactory();
14171419
}
14181420
}
14191421

@@ -1422,7 +1424,8 @@ void PrestoServer::unregisterFileReadersAndWriters() {
14221424
velox::dwrf::unregisterDwrfWriterFactory();
14231425
velox::parquet::unregisterParquetReaderFactory();
14241426
velox::parquet::unregisterParquetWriterFactory();
1425-
if (SystemConfig::instance()->textWriterEnabled()) {
1427+
if (SystemConfig::instance()->textReaderWriterEnabled()) {
1428+
velox::text::unregisterTextReaderFactory();
14261429
velox::text::unregisterTextWriterFactory();
14271430
}
14281431
}

presto-native-execution/presto_cpp/main/common/Configs.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ SystemConfig::SystemConfig() {
260260
NUM_PROP(kExchangeIoEvbViolationThresholdMs, 1000),
261261
NUM_PROP(kHttpSrvIoEvbViolationThresholdMs, 1000),
262262
NUM_PROP(kMaxLocalExchangePartitionBufferSize, 65536),
263-
BOOL_PROP(kTextWriterEnabled, true),
263+
BOOL_PROP(kTextReaderWriterEnabled, true),
264264
BOOL_PROP(kCharNToVarcharImplicitCast, false),
265265
BOOL_PROP(kEnumTypesEnabled, true),
266266
};
@@ -924,8 +924,8 @@ uint64_t SystemConfig::maxLocalExchangePartitionBufferSize() const {
924924
.value();
925925
}
926926

927-
bool SystemConfig::textWriterEnabled() const {
928-
return optionalProperty<bool>(kTextWriterEnabled).value();
927+
bool SystemConfig::textReaderWriterEnabled() const {
928+
return optionalProperty<bool>(kTextReaderWriterEnabled).value();
929929
}
930930

931931
bool SystemConfig::charNToVarcharImplicitCast() const {

presto-native-execution/presto_cpp/main/common/Configs.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -759,9 +759,9 @@ class SystemConfig : public ConfigBase {
759759
static constexpr std::string_view kMaxLocalExchangePartitionBufferSize{
760760
"local-exchange.max-partition-buffer-size"};
761761

762-
// Add to temporarily help with gradual rollout for text writer
763-
// TODO: remove once text writer is fully rolled out
764-
static constexpr std::string_view kTextWriterEnabled{"text-writer-enabled"};
762+
// Add to temporarily help with gradual rollout for text reader and writer
763+
// TODO: remove once text reader and writer are fully rolled out
764+
static constexpr std::string_view kTextReaderWriterEnabled{"text-reader-writer-enabled"};
765765

766766
/// Enable the type char(n) with the same behavior as unbounded varchar.
767767
/// char(n) type is not supported by parser when set to false.
@@ -1063,7 +1063,7 @@ class SystemConfig : public ConfigBase {
10631063

10641064
uint64_t maxLocalExchangePartitionBufferSize() const;
10651065

1066-
bool textWriterEnabled() const;
1066+
bool textReaderWriterEnabled() const;
10671067

10681068
bool charNToVarcharImplicitCast() const;
10691069

presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java

Lines changed: 70 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,76 @@ public void testDateFilter()
403403
}
404404
}
405405

406+
@Test(groups = {"textfile"})
407+
public void testReadTableWithTextfileFormat()
408+
{
409+
assertQuery("SELECT * FROM nation_text");
410+
411+
String tmpTableName = generateRandomTableName();
412+
try {
413+
getExpectedQueryRunner().execute(getSession(), format(
414+
"CREATE TABLE %s (" +
415+
"id BIGINT," +
416+
"name VARCHAR," +
417+
"is_active BOOLEAN," +
418+
"score DOUBLE," +
419+
"created_at TIMESTAMP," +
420+
"tags ARRAY<VARCHAR>," +
421+
"metrics ARRAY<DOUBLE>," +
422+
"properties MAP<VARCHAR, VARCHAR>," +
423+
"flags MAP<TINYINT, BOOLEAN>," +
424+
"nested_struct ROW(sub_id INTEGER, sub_name VARCHAR, sub_scores ARRAY<REAL>, sub_map MAP<SMALLINT, VARCHAR>)," +
425+
"price DECIMAL(15,2)," +
426+
"amount DECIMAL(21,6)," +
427+
"event_date DATE," +
428+
"ds VARCHAR" +
429+
") WITH (format = 'TEXTFILE', partitioned_by = ARRAY['ds'])", tmpTableName), ImmutableList.of());
430+
getExpectedQueryRunner().execute(getSession(), format(
431+
"INSERT INTO %s (" +
432+
"id," +
433+
"name," +
434+
"is_active," +
435+
"score," +
436+
"created_at," +
437+
"tags," +
438+
"metrics," +
439+
"properties," +
440+
"flags," +
441+
"nested_struct," +
442+
"price," +
443+
"amount," +
444+
"event_date," +
445+
"ds" +
446+
") VALUES (" +
447+
"1001," +
448+
"'Jane Doe'," +
449+
"TRUE," +
450+
"88.5," +
451+
"TIMESTAMP '2025-07-23 10:00:00'," +
452+
"ARRAY['alpha', 'beta', 'gamma']," +
453+
"ARRAY[3.14, 2.71, 1.41]," +
454+
"MAP(ARRAY['color', 'size'], ARRAY['blue', 'large'])," +
455+
"MAP(ARRAY[TINYINT '1', TINYINT '2'], ARRAY[TRUE, FALSE])," +
456+
"ROW(" +
457+
"42," +
458+
"'sub_jane'," +
459+
"ARRAY[REAL '1.1', REAL '2.2', REAL '3.3']," +
460+
"MAP(ARRAY[SMALLINT '10', SMALLINT '20'], ARRAY['foo', 'bar'])" +
461+
")," +
462+
"DECIMAL '12.34'," +
463+
"CAST('-123456789012345.123456' as DECIMAL(21,6))," +
464+
"DATE '2024-02-29'," +
465+
"'2025-07-01'" +
466+
")", tmpTableName), ImmutableList.of());
467+
// created_at is skipped because of the inconsistency in TIMESTAMP columns between Presto and Velox.
468+
// https://github.com/facebookincubator/velox/issues/8127
469+
assertQuery(format("SELECT id, name, is_active, score, tags, metrics, properties, flags, nested_struct, price, amount, event_date, ds FROM %s", tmpTableName));
470+
}
471+
finally {
472+
dropTableIfExists(tmpTableName);
473+
}
474+
}
475+
406476
@Test
407477
public void testOrderBy()
408478
{
@@ -1259,18 +1329,6 @@ public void testReadTableWithUnsupportedJsonFormat()
12591329
assertQueryFails("SELECT * FROM nation_json", ".*ReaderFactory is not registered for format json.*");
12601330
}
12611331

1262-
@Test(groups = {"no_textfile_reader"})
1263-
public void testReadTableWithUnsupportedTextfileFormat()
1264-
{
1265-
assertQueryFails("SELECT * FROM nation_text", ".*ReaderFactory is not registered for format text.*");
1266-
}
1267-
1268-
@Test(groups = {"textfile_reader"})
1269-
public void testReadTableWithTextfileFormat()
1270-
{
1271-
assertQuery("SELECT * FROM nation_text");
1272-
}
1273-
12741332
private void dropTableIfExists(String tableName)
12751333
{
12761334
// An ugly workaround for the lack of getExpectedQueryRunner()

presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeTpcdsQueries.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ private static void createTpcdsCallCenter(QueryRunner queryRunner, Session sessi
9090
switch (storageFormat) {
9191
case "PARQUET":
9292
case "ORC":
93+
case "TEXTFILE":
9394
queryRunner.execute(session, "CREATE TABLE call_center AS " +
9495
"SELECT * FROM tpcds.tiny.call_center");
9596
break;
@@ -158,6 +159,7 @@ private static void createTpcdsDateDim(QueryRunner queryRunner, Session session,
158159
switch (storageFormat) {
159160
case "PARQUET":
160161
case "ORC":
162+
case "TEXTFILE":
161163
queryRunner.execute(session, "CREATE TABLE date_dim AS " +
162164
"SELECT * FROM tpcds.tiny.date_dim");
163165
break;
@@ -202,6 +204,7 @@ private static void createTpcdsItem(QueryRunner queryRunner, Session session, St
202204
switch (storageFormat) {
203205
case "PARQUET":
204206
case "ORC":
207+
case "TEXTFILE":
205208
queryRunner.execute(session, "CREATE TABLE item AS " +
206209
"SELECT * FROM tpcds.tiny.item");
207210
break;
@@ -246,6 +249,7 @@ private static void createTpcdsStore(QueryRunner queryRunner, Session session, S
246249
switch (storageFormat) {
247250
case "PARQUET":
248251
case "ORC":
252+
case "TEXTFILE":
249253
queryRunner.execute(session, "CREATE TABLE store AS " +
250254
"SELECT * FROM tpcds.tiny.store");
251255
break;
@@ -300,6 +304,7 @@ private static void createTpcdsWebPage(QueryRunner queryRunner, Session session,
300304
switch (storageFormat) {
301305
case "PARQUET":
302306
case "ORC":
307+
case "TEXTFILE":
303308
queryRunner.execute(session, "CREATE TABLE web_page AS " +
304309
"SELECT * FROM tpcds.tiny.web_page");
305310
break;
@@ -337,6 +342,7 @@ private static void createTpcdsWebSite(QueryRunner queryRunner, Session session,
337342
switch (storageFormat) {
338343
case "PARQUET":
339344
case "ORC":
345+
case "TEXTFILE":
340346
queryRunner.execute(session, "CREATE TABLE web_site AS " +
341347
"SELECT * FROM tpcds.tiny.web_site");
342348
break;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package com.facebook.presto.nativeworker;
15+
16+
import com.facebook.presto.testing.ExpectedQueryRunner;
17+
import com.facebook.presto.testing.QueryRunner;
18+
import org.testng.annotations.Test;
19+
20+
@Test(groups = {"textfile"})
21+
public class TestPrestoNativeTpcdsQueriesTextfileUsingThrift
22+
extends AbstractTestNativeTpcdsQueries
23+
{
24+
@Override
25+
protected QueryRunner createQueryRunner()
26+
throws Exception
27+
{
28+
return PrestoNativeQueryRunnerUtils.nativeHiveQueryRunnerBuilder()
29+
.setStorageFormat("TEXTFILE")
30+
.setAddStorageFormatToPath(true)
31+
.setUseThrift(true)
32+
.build();
33+
}
34+
35+
@Override
36+
protected ExpectedQueryRunner createExpectedQueryRunner()
37+
throws Exception
38+
{
39+
this.storageFormat = "TEXTFILE";
40+
return PrestoNativeQueryRunnerUtils.javaHiveQueryRunnerBuilder()
41+
.setStorageFormat(this.storageFormat)
42+
.setAddStorageFormatToPath(true)
43+
.build();
44+
}
45+
}
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* http://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
package com.facebook.presto.nativeworker;
15+
16+
import com.facebook.presto.testing.ExpectedQueryRunner;
17+
import com.facebook.presto.testing.QueryRunner;
18+
import org.testng.annotations.Test;
19+
20+
@Test(groups = {"textfile"})
21+
public class TestPrestoNativeTpchQueriesTextfileUsingJSON
22+
extends AbstractTestNativeTpchQueries
23+
{
24+
private final String storageFormat = "TEXTFILE";
25+
26+
@Override
27+
protected QueryRunner createQueryRunner() throws Exception
28+
{
29+
return PrestoNativeQueryRunnerUtils.nativeHiveQueryRunnerBuilder()
30+
.setStorageFormat(storageFormat)
31+
.setAddStorageFormatToPath(true)
32+
.build();
33+
}
34+
35+
@Override
36+
protected ExpectedQueryRunner createExpectedQueryRunner() throws Exception
37+
{
38+
return PrestoNativeQueryRunnerUtils.javaHiveQueryRunnerBuilder()
39+
.setStorageFormat(storageFormat)
40+
.setAddStorageFormatToPath(true)
41+
.build();
42+
}
43+
}

presto-native-execution/src/test/java/com/facebook/presto/spark/TestPrestoSparkNativeGeneralQueries.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ public void testUnicodeInJson()
6666
@Ignore
6767
public void testDistributedSortSingleNode() {}
6868

69-
// Disable: Text file reader is not supported. This test is also disabled in pom.xml through disabling groups "textfile_reader".
69+
// TODO: Enable testReadTableWithTextfileFormat when POS can be run on JDK 17.
7070
@Override
7171
public void testReadTableWithTextfileFormat() {}
7272

0 commit comments

Comments
 (0)