Skip to content

Commit 49fc64e

Browse files
committed
feat(native): Add TextReader
1 parent 6a51bc1 commit 49fc64e

File tree

12 files changed

+207
-19
lines changed

12 files changed

+207
-19
lines changed

presto-docs/src/main/sphinx/presto_cpp/properties.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,22 @@ avoid exceeding memory limits for the query.
431431
only by aborting. This flag is only effective if
432432
``shared-arbitrator.global-arbitration-enabled`` is ``true``.
433433

434+
``text-writer-enabled``
435+
^^^^^^^^^^^^^^^^^^^^^^^
436+
437+
* **Type:** ``boolean``
438+
* **Default value:** ``true``
439+
440+
Enables writing data in ``TEXTFILE`` format.
441+
442+
``text-reader-enabled``
443+
^^^^^^^^^^^^^^^^^^^^^^^
444+
445+
* **Type:** ``boolean``
446+
* **Default value:** ``true``
447+
448+
Enables reading data in ``TEXTFILE`` format.
449+
434450
Cache Properties
435451
----------------
436452

presto-native-execution/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@
413413
<configuration>
414414
<forkCount>1</forkCount>
415415
<reuseForks>false</reuseForks>
416-
<excludedGroups>remote-function,textfile_reader</excludedGroups>
416+
<excludedGroups>remote-function</excludedGroups>
417417
<systemPropertyVariables>
418418
<PRESTO_SERVER>/root/project/build/debug/presto_cpp/main/presto_server</PRESTO_SERVER>
419419
<DATA_DIR>/tmp/velox</DATA_DIR>
@@ -465,7 +465,7 @@
465465
<groupId>org.apache.maven.plugins</groupId>
466466
<artifactId>maven-surefire-plugin</artifactId>
467467
<configuration>
468-
<excludedGroups combine.self="override">writer,parquet,remote-function,textfile_reader,no_textfile_reader,async_data_cache</excludedGroups>
468+
<excludedGroups combine.self="override">writer,parquet,remote-function,textfile,async_data_cache</excludedGroups>
469469
</configuration>
470470
</plugin>
471471
</plugins>

presto-native-execution/presto_cpp/main/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ target_link_libraries(
7878
velox_dwio_orc_reader
7979
velox_dwio_parquet_reader
8080
velox_dwio_parquet_writer
81+
velox_dwio_text_reader_register
8182
velox_dwio_text_writer_register
8283
velox_dynamic_library_loader
8384
velox_encode

presto-native-execution/presto_cpp/main/PrestoServer.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
#include "velox/dwio/orc/reader/OrcReader.h"
6262
#include "velox/dwio/parquet/RegisterParquetReader.h"
6363
#include "velox/dwio/parquet/RegisterParquetWriter.h"
64+
#include "velox/dwio/text/RegisterTextReader.h"
6465
#include "velox/dwio/text/RegisterTextWriter.h"
6566
#include "velox/exec/OutputBufferManager.h"
6667
#include "velox/functions/prestosql/aggregates/RegisterAggregateFunctions.h"
@@ -1445,6 +1446,9 @@ void PrestoServer::registerFileReadersAndWriters() {
14451446
if (SystemConfig::instance()->textWriterEnabled()) {
14461447
velox::text::registerTextWriterFactory();
14471448
}
1449+
if (SystemConfig::instance()->textReaderEnabled()) {
1450+
velox::text::registerTextReaderFactory();
1451+
}
14481452
}
14491453

14501454
void PrestoServer::unregisterFileReadersAndWriters() {
@@ -1455,6 +1459,9 @@ void PrestoServer::unregisterFileReadersAndWriters() {
14551459
if (SystemConfig::instance()->textWriterEnabled()) {
14561460
velox::text::unregisterTextWriterFactory();
14571461
}
1462+
if (SystemConfig::instance()->textReaderEnabled()) {
1463+
velox::text::unregisterTextReaderFactory();
1464+
}
14581465
}
14591466

14601467
void PrestoServer::registerStatsCounters() {

presto-native-execution/presto_cpp/main/common/Configs.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,7 @@ SystemConfig::SystemConfig() {
268268
NUM_PROP(kHttpSrvIoEvbViolationThresholdMs, 1000),
269269
NUM_PROP(kMaxLocalExchangePartitionBufferSize, 65536),
270270
BOOL_PROP(kTextWriterEnabled, true),
271+
BOOL_PROP(kTextReaderEnabled, true),
271272
BOOL_PROP(kCharNToVarcharImplicitCast, false),
272273
BOOL_PROP(kEnumTypesEnabled, true),
273274
};
@@ -969,6 +970,10 @@ bool SystemConfig::textWriterEnabled() const {
969970
return optionalProperty<bool>(kTextWriterEnabled).value();
970971
}
971972

973+
bool SystemConfig::textReaderEnabled() const {
974+
return optionalProperty<bool>(kTextReaderEnabled).value();
975+
}
976+
972977
bool SystemConfig::charNToVarcharImplicitCast() const {
973978
return optionalProperty<bool>(kCharNToVarcharImplicitCast).value();
974979
}

presto-native-execution/presto_cpp/main/common/Configs.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,10 @@ class SystemConfig : public ConfigBase {
792792
// TODO: remove once text writer is fully rolled out
793793
static constexpr std::string_view kTextWriterEnabled{"text-writer-enabled"};
794794

795+
// Add to temporarily help with gradual rollout for text reader
796+
// TODO: remove once text reader is fully rolled out
797+
static constexpr std::string_view kTextReaderEnabled{"text-reader-enabled"};
798+
795799
/// Enable the type char(n) with the same behavior as unbounded varchar.
796800
/// char(n) type is not supported by parser when set to false.
797801
static constexpr std::string_view kCharNToVarcharImplicitCast{
@@ -1108,6 +1112,8 @@ class SystemConfig : public ConfigBase {
11081112

11091113
bool textWriterEnabled() const;
11101114

1115+
bool textReaderEnabled() const;
1116+
11111117
bool charNToVarcharImplicitCast() const;
11121118

11131119
bool enumTypesEnabled() const;

presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java

Lines changed: 70 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,76 @@ public void testDateFilter()
413413
}
414414
}
415415

416+
@Test(groups = {"textfile"})
417+
public void testReadTableWithTextfileFormat()
418+
{
419+
assertQuery("SELECT * FROM nation_text");
420+
421+
String tmpTableName = generateRandomTableName();
422+
try {
423+
getExpectedQueryRunner().execute(getSession(), format(
424+
"CREATE TABLE %s (" +
425+
"id BIGINT," +
426+
"name VARCHAR," +
427+
"is_active BOOLEAN," +
428+
"score DOUBLE," +
429+
"created_at TIMESTAMP," +
430+
"tags ARRAY<VARCHAR>," +
431+
"metrics ARRAY<DOUBLE>," +
432+
"properties MAP<VARCHAR, VARCHAR>," +
433+
"flags MAP<TINYINT, BOOLEAN>," +
434+
"nested_struct ROW(sub_id INTEGER, sub_name VARCHAR, sub_scores ARRAY<REAL>, sub_map MAP<SMALLINT, VARCHAR>)," +
435+
"price DECIMAL(15,2)," +
436+
"amount DECIMAL(21,6)," +
437+
"event_date DATE," +
438+
"ds VARCHAR" +
439+
") WITH (format = 'TEXTFILE', partitioned_by = ARRAY['ds'])", tmpTableName), ImmutableList.of());
440+
getExpectedQueryRunner().execute(getSession(), format(
441+
"INSERT INTO %s (" +
442+
"id," +
443+
"name," +
444+
"is_active," +
445+
"score," +
446+
"created_at," +
447+
"tags," +
448+
"metrics," +
449+
"properties," +
450+
"flags," +
451+
"nested_struct," +
452+
"price," +
453+
"amount," +
454+
"event_date," +
455+
"ds" +
456+
") VALUES (" +
457+
"1001," +
458+
"'Jane Doe'," +
459+
"TRUE," +
460+
"88.5," +
461+
"TIMESTAMP '2025-07-23 10:00:00'," +
462+
"ARRAY['alpha', 'beta', 'gamma']," +
463+
"ARRAY[3.14, 2.71, 1.41]," +
464+
"MAP(ARRAY['color', 'size'], ARRAY['blue', 'large'])," +
465+
"MAP(ARRAY[TINYINT '1', TINYINT '2'], ARRAY[TRUE, FALSE])," +
466+
"ROW(" +
467+
"42," +
468+
"'sub_jane'," +
469+
"ARRAY[REAL '1.1', REAL '2.2', REAL '3.3']," +
470+
"MAP(ARRAY[SMALLINT '10', SMALLINT '20'], ARRAY['foo', 'bar'])" +
471+
")," +
472+
"DECIMAL '12.34'," +
473+
"CAST('-123456789012345.123456' as DECIMAL(21,6))," +
474+
"DATE '2024-02-29'," +
475+
"'2025-07-01'" +
476+
")", tmpTableName), ImmutableList.of());
477+
// created_at is skipped because of the inconsistency in TIMESTAMP columns between Presto and Velox.
478+
// https://github.com/facebookincubator/velox/issues/8127
479+
assertQuery(format("SELECT id, name, is_active, score, tags, metrics, properties, flags, nested_struct, price, amount, event_date, ds FROM %s", tmpTableName));
480+
}
481+
finally {
482+
dropTableIfExists(tmpTableName);
483+
}
484+
}
485+
416486
@Test
417487
public void testOrderBy()
418488
{
@@ -1269,18 +1339,6 @@ public void testReadTableWithUnsupportedJsonFormat()
12691339
assertQueryFails("SELECT * FROM nation_json", "(?s).*ReaderFactory is not registered for format json.*");
12701340
}
12711341

1272-
@Test(groups = {"no_textfile_reader"})
1273-
public void testReadTableWithUnsupportedTextfileFormat()
1274-
{
1275-
assertQueryFails("SELECT * FROM nation_text", "(?s).*ReaderFactory is not registered for format text.*");
1276-
}
1277-
1278-
@Test(groups = {"textfile_reader"})
1279-
public void testReadTableWithTextfileFormat()
1280-
{
1281-
assertQuery("SELECT * FROM nation_text");
1282-
}
1283-
12841342
private void dropTableIfExists(String tableName)
12851343
{
12861344
// An ugly workaround for the lack of getExpectedQueryRunner()

presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeTpcdsQueries.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ public abstract class AbstractTestNativeTpcdsQueries
4141
"web_sales", "web_site"};
4242
Map<String, Long> deletedRowsMap = new HashMap<>();
4343

44+
public void setStorageFormat(String storageFormat)
45+
{
46+
this.storageFormat = storageFormat;
47+
}
48+
4449
@Override
4550
protected void createTables()
4651
{
@@ -90,6 +95,7 @@ private static void createTpcdsCallCenter(QueryRunner queryRunner, Session sessi
9095
switch (storageFormat) {
9196
case "PARQUET":
9297
case "ORC":
98+
case "TEXTFILE":
9399
queryRunner.execute(session, "CREATE TABLE call_center AS " +
94100
"SELECT * FROM tpcds.tiny.call_center");
95101
break;
@@ -158,6 +164,7 @@ private static void createTpcdsDateDim(QueryRunner queryRunner, Session session,
158164
switch (storageFormat) {
159165
case "PARQUET":
160166
case "ORC":
167+
case "TEXTFILE":
161168
queryRunner.execute(session, "CREATE TABLE date_dim AS " +
162169
"SELECT * FROM tpcds.tiny.date_dim");
163170
break;
@@ -202,6 +209,7 @@ private static void createTpcdsItem(QueryRunner queryRunner, Session session, St
202209
switch (storageFormat) {
203210
case "PARQUET":
204211
case "ORC":
212+
case "TEXTFILE":
205213
queryRunner.execute(session, "CREATE TABLE item AS " +
206214
"SELECT * FROM tpcds.tiny.item");
207215
break;
@@ -246,6 +254,7 @@ private static void createTpcdsStore(QueryRunner queryRunner, Session session, S
246254
switch (storageFormat) {
247255
case "PARQUET":
248256
case "ORC":
257+
case "TEXTFILE":
249258
queryRunner.execute(session, "CREATE TABLE store AS " +
250259
"SELECT * FROM tpcds.tiny.store");
251260
break;
@@ -300,6 +309,7 @@ private static void createTpcdsWebPage(QueryRunner queryRunner, Session session,
300309
switch (storageFormat) {
301310
case "PARQUET":
302311
case "ORC":
312+
case "TEXTFILE":
303313
queryRunner.execute(session, "CREATE TABLE web_page AS " +
304314
"SELECT * FROM tpcds.tiny.web_page");
305315
break;
@@ -337,6 +347,7 @@ private static void createTpcdsWebSite(QueryRunner queryRunner, Session session,
337347
switch (storageFormat) {
338348
case "PARQUET":
339349
case "ORC":
350+
case "TEXTFILE":
340351
queryRunner.execute(session, "CREATE TABLE web_site AS " +
341352
"SELECT * FROM tpcds.tiny.web_site");
342353
break;

presto-native-execution/src/test/java/com/facebook/presto/spark/TestPrestoSparkNativeGeneralQueries.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,6 @@ public void testUnicodeInJson()
8282
@Ignore
8383
public void testDistributedSortSingleNode() {}
8484

85-
// Disable: Text file reader is not supported. This test is also disabled in pom.xml through disabling groups "textfile_reader".
86-
@Override
87-
public void testReadTableWithTextfileFormat() {}
88-
8985
// Disable: Not supporte by POS
9086
@Override
9187
@Ignore

presto-native-tests/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@
266266
<configuration>
267267
<forkCount>1</forkCount>
268268
<reuseForks>false</reuseForks>
269-
<excludedGroups>remote-function,textfile_reader</excludedGroups>
269+
<excludedGroups>remote-function</excludedGroups>
270270
<systemPropertyVariables>
271271
<PRESTO_SERVER>/root/project/build/debug/presto_cpp/main/presto_server</PRESTO_SERVER>
272272
</systemPropertyVariables>

0 commit comments

Comments
 (0)