Skip to content

Commit 215a349

Browse files
committed
feat(native): Add TextReader
1 parent 1bd47cb commit 215a349

File tree

12 files changed

+205
-20
lines changed

12 files changed

+205
-20
lines changed

presto-docs/src/main/sphinx/presto_cpp/properties.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,22 @@ avoid exceeding memory limits for the query.
431431
only by aborting. This flag is only effective if
432432
``shared-arbitrator.global-arbitration-enabled`` is ``true``.
433433

434+
``text-writer-enabled``
435+
^^^^^^^^^^^^^^^^^^^^^^^
436+
437+
* **Type:** ``boolean``
438+
* **Default value:** ``true``
439+
440+
Enables writing data in ``TEXTFILE`` format.
441+
442+
``text-reader-enabled``
443+
^^^^^^^^^^^^^^^^^^^^^^^
444+
445+
* **Type:** ``boolean``
446+
* **Default value:** ``true``
447+
448+
Enables reading data in ``TEXTFILE`` format.
449+
434450
Cache Properties
435451
----------------
436452

presto-native-execution/pom.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@
413413
<configuration>
414414
<forkCount>1</forkCount>
415415
<reuseForks>false</reuseForks>
416-
<excludedGroups>remote-function,textfile_reader</excludedGroups>
416+
<excludedGroups>remote-function</excludedGroups>
417417
<systemPropertyVariables>
418418
<PRESTO_SERVER>/root/project/build/debug/presto_cpp/main/presto_server</PRESTO_SERVER>
419419
<DATA_DIR>/tmp/velox</DATA_DIR>
@@ -465,7 +465,7 @@
465465
<groupId>org.apache.maven.plugins</groupId>
466466
<artifactId>maven-surefire-plugin</artifactId>
467467
<configuration>
468-
<excludedGroups combine.self="override">writer,parquet,remote-function,textfile_reader,no_textfile_reader,async_data_cache</excludedGroups>
468+
<excludedGroups combine.self="override">writer,parquet,remote-function,textfile,async_data_cache</excludedGroups>
469469
</configuration>
470470
</plugin>
471471
</plugins>

presto-native-execution/presto_cpp/main/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ target_link_libraries(
7878
velox_dwio_orc_reader
7979
velox_dwio_parquet_reader
8080
velox_dwio_parquet_writer
81+
velox_dwio_text_reader_register
8182
velox_dwio_text_writer_register
8283
velox_dynamic_library_loader
8384
velox_encode

presto-native-execution/presto_cpp/main/PrestoServer.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
#include "velox/dwio/orc/reader/OrcReader.h"
6262
#include "velox/dwio/parquet/RegisterParquetReader.h"
6363
#include "velox/dwio/parquet/RegisterParquetWriter.h"
64+
#include "velox/dwio/text/RegisterTextReader.h"
6465
#include "velox/dwio/text/RegisterTextWriter.h"
6566
#include "velox/exec/OutputBufferManager.h"
6667
#include "velox/exec/TraceUtil.h"
@@ -1447,6 +1448,9 @@ void PrestoServer::registerFileReadersAndWriters() {
14471448
if (SystemConfig::instance()->textWriterEnabled()) {
14481449
velox::text::registerTextWriterFactory();
14491450
}
1451+
if (SystemConfig::instance()->textReaderEnabled()) {
1452+
velox::text::registerTextReaderFactory();
1453+
}
14501454
}
14511455

14521456
void PrestoServer::unregisterFileReadersAndWriters() {
@@ -1457,6 +1461,9 @@ void PrestoServer::unregisterFileReadersAndWriters() {
14571461
if (SystemConfig::instance()->textWriterEnabled()) {
14581462
velox::text::unregisterTextWriterFactory();
14591463
}
1464+
if (SystemConfig::instance()->textReaderEnabled()) {
1465+
velox::text::unregisterTextReaderFactory();
1466+
}
14601467
}
14611468

14621469
void PrestoServer::registerStatsCounters() {

presto-native-execution/presto_cpp/main/common/Configs.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ SystemConfig::SystemConfig() {
273273
NUM_PROP(kHttpSrvIoEvbViolationThresholdMs, 1000),
274274
NUM_PROP(kMaxLocalExchangePartitionBufferSize, 65536),
275275
BOOL_PROP(kTextWriterEnabled, true),
276+
BOOL_PROP(kTextReaderEnabled, true),
276277
BOOL_PROP(kCharNToVarcharImplicitCast, false),
277278
BOOL_PROP(kEnumTypesEnabled, true),
278279
};
@@ -997,6 +998,10 @@ bool SystemConfig::textWriterEnabled() const {
997998
return optionalProperty<bool>(kTextWriterEnabled).value();
998999
}
9991000

1001+
bool SystemConfig::textReaderEnabled() const {
1002+
return optionalProperty<bool>(kTextReaderEnabled).value();
1003+
}
1004+
10001005
bool SystemConfig::charNToVarcharImplicitCast() const {
10011006
return optionalProperty<bool>(kCharNToVarcharImplicitCast).value();
10021007
}

presto-native-execution/presto_cpp/main/common/Configs.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -813,6 +813,10 @@ class SystemConfig : public ConfigBase {
813813
// TODO: remove once text writer is fully rolled out
814814
static constexpr std::string_view kTextWriterEnabled{"text-writer-enabled"};
815815

816+
// Add to temporarily help with gradual rollout for text reader
817+
// TODO: remove once text reader is fully rolled out
818+
static constexpr std::string_view kTextReaderEnabled{"text-reader-enabled"};
819+
816820
/// Enable the type char(n) with the same behavior as unbounded varchar.
817821
/// char(n) type is not supported by parser when set to false.
818822
static constexpr std::string_view kCharNToVarcharImplicitCast{
@@ -1139,6 +1143,8 @@ class SystemConfig : public ConfigBase {
11391143

11401144
bool textWriterEnabled() const;
11411145

1146+
bool textReaderEnabled() const;
1147+
11421148
bool charNToVarcharImplicitCast() const;
11431149

11441150
bool enumTypesEnabled() const;

presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeGeneralQueries.java

Lines changed: 70 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,76 @@ public void testDateFilter()
413413
}
414414
}
415415

416+
@Test(groups = {"textfile"})
417+
public void testReadTableWithTextfileFormat()
418+
{
419+
assertQuery("SELECT * FROM nation_text");
420+
421+
String tmpTableName = generateRandomTableName();
422+
try {
423+
getExpectedQueryRunner().execute(getSession(), format(
424+
"CREATE TABLE %s (" +
425+
"id BIGINT," +
426+
"name VARCHAR," +
427+
"is_active BOOLEAN," +
428+
"score DOUBLE," +
429+
"created_at TIMESTAMP," +
430+
"tags ARRAY<VARCHAR>," +
431+
"metrics ARRAY<DOUBLE>," +
432+
"properties MAP<VARCHAR, VARCHAR>," +
433+
"flags MAP<TINYINT, BOOLEAN>," +
434+
"nested_struct ROW(sub_id INTEGER, sub_name VARCHAR, sub_scores ARRAY<REAL>, sub_map MAP<SMALLINT, VARCHAR>)," +
435+
"price DECIMAL(15,2)," +
436+
"amount DECIMAL(21,6)," +
437+
"event_date DATE," +
438+
"ds VARCHAR" +
439+
") WITH (format = 'TEXTFILE', partitioned_by = ARRAY['ds'])", tmpTableName), ImmutableList.of());
440+
getExpectedQueryRunner().execute(getSession(), format(
441+
"INSERT INTO %s (" +
442+
"id," +
443+
"name," +
444+
"is_active," +
445+
"score," +
446+
"created_at," +
447+
"tags," +
448+
"metrics," +
449+
"properties," +
450+
"flags," +
451+
"nested_struct," +
452+
"price," +
453+
"amount," +
454+
"event_date," +
455+
"ds" +
456+
") VALUES (" +
457+
"1001," +
458+
"'Jane Doe'," +
459+
"TRUE," +
460+
"88.5," +
461+
"TIMESTAMP '2025-07-23 10:00:00'," +
462+
"ARRAY['alpha', 'beta', 'gamma']," +
463+
"ARRAY[3.14, 2.71, 1.41]," +
464+
"MAP(ARRAY['color', 'size'], ARRAY['blue', 'large'])," +
465+
"MAP(ARRAY[TINYINT '1', TINYINT '2'], ARRAY[TRUE, FALSE])," +
466+
"ROW(" +
467+
"42," +
468+
"'sub_jane'," +
469+
"ARRAY[REAL '1.1', REAL '2.2', REAL '3.3']," +
470+
"MAP(ARRAY[SMALLINT '10', SMALLINT '20'], ARRAY['foo', 'bar'])" +
471+
")," +
472+
"DECIMAL '12.34'," +
473+
"CAST('-123456789012345.123456' as DECIMAL(21,6))," +
474+
"DATE '2024-02-29'," +
475+
"'2025-07-01'" +
476+
")", tmpTableName), ImmutableList.of());
477+
// created_at is skipped because of the inconsistency in TIMESTAMP columns between Presto and Velox.
478+
// https://github.com/facebookincubator/velox/issues/8127
479+
assertQuery(format("SELECT id, name, is_active, score, tags, metrics, properties, flags, nested_struct, price, amount, event_date, ds FROM %s", tmpTableName));
480+
}
481+
finally {
482+
dropTableIfExists(tmpTableName);
483+
}
484+
}
485+
416486
@Test
417487
public void testOrderBy()
418488
{
@@ -1269,18 +1339,6 @@ public void testReadTableWithUnsupportedJsonFormat()
12691339
assertQueryFails("SELECT * FROM nation_json", "(?s).*ReaderFactory is not registered for format json.*");
12701340
}
12711341

1272-
@Test(groups = {"no_textfile_reader"})
1273-
public void testReadTableWithUnsupportedTextfileFormat()
1274-
{
1275-
assertQueryFails("SELECT * FROM nation_text", "(?s).*ReaderFactory is not registered for format text.*");
1276-
}
1277-
1278-
@Test(groups = {"textfile_reader"})
1279-
public void testReadTableWithTextfileFormat()
1280-
{
1281-
assertQuery("SELECT * FROM nation_text");
1282-
}
1283-
12841342
private void dropTableIfExists(String tableName)
12851343
{
12861344
// An ugly workaround for the lack of getExpectedQueryRunner()

presto-native-execution/src/test/java/com/facebook/presto/nativeworker/AbstractTestNativeTpcdsQueries.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
public abstract class AbstractTestNativeTpcdsQueries
3333
extends AbstractTestQueryFramework
3434
{
35-
String storageFormat = "DWRF";
35+
protected String storageFormat = "DWRF";
3636
Session session;
3737
String[] tpcdsTableNames = {"call_center", "catalog_page", "catalog_returns", "catalog_sales",
3838
"customer", "customer_address", "customer_demographics", "date_dim", "household_demographics",
@@ -90,6 +90,7 @@ private static void createTpcdsCallCenter(QueryRunner queryRunner, Session sessi
9090
switch (storageFormat) {
9191
case "PARQUET":
9292
case "ORC":
93+
case "TEXTFILE":
9394
queryRunner.execute(session, "CREATE TABLE call_center AS " +
9495
"SELECT * FROM tpcds.tiny.call_center");
9596
break;
@@ -158,6 +159,7 @@ private static void createTpcdsDateDim(QueryRunner queryRunner, Session session,
158159
switch (storageFormat) {
159160
case "PARQUET":
160161
case "ORC":
162+
case "TEXTFILE":
161163
queryRunner.execute(session, "CREATE TABLE date_dim AS " +
162164
"SELECT * FROM tpcds.tiny.date_dim");
163165
break;
@@ -202,6 +204,7 @@ private static void createTpcdsItem(QueryRunner queryRunner, Session session, St
202204
switch (storageFormat) {
203205
case "PARQUET":
204206
case "ORC":
207+
case "TEXTFILE":
205208
queryRunner.execute(session, "CREATE TABLE item AS " +
206209
"SELECT * FROM tpcds.tiny.item");
207210
break;
@@ -246,6 +249,7 @@ private static void createTpcdsStore(QueryRunner queryRunner, Session session, S
246249
switch (storageFormat) {
247250
case "PARQUET":
248251
case "ORC":
252+
case "TEXTFILE":
249253
queryRunner.execute(session, "CREATE TABLE store AS " +
250254
"SELECT * FROM tpcds.tiny.store");
251255
break;
@@ -300,6 +304,7 @@ private static void createTpcdsWebPage(QueryRunner queryRunner, Session session,
300304
switch (storageFormat) {
301305
case "PARQUET":
302306
case "ORC":
307+
case "TEXTFILE":
303308
queryRunner.execute(session, "CREATE TABLE web_page AS " +
304309
"SELECT * FROM tpcds.tiny.web_page");
305310
break;
@@ -337,6 +342,7 @@ private static void createTpcdsWebSite(QueryRunner queryRunner, Session session,
337342
switch (storageFormat) {
338343
case "PARQUET":
339344
case "ORC":
345+
case "TEXTFILE":
340346
queryRunner.execute(session, "CREATE TABLE web_site AS " +
341347
"SELECT * FROM tpcds.tiny.web_site");
342348
break;

presto-native-execution/src/test/java/com/facebook/presto/spark/TestPrestoSparkNativeGeneralQueries.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,6 @@ public void testUnicodeInJson()
8282
@Ignore
8383
public void testDistributedSortSingleNode() {}
8484

85-
// Disable: Text file reader is not supported. This test is also disabled in pom.xml through disabling groups "textfile_reader".
86-
@Override
87-
public void testReadTableWithTextfileFormat() {}
88-
8985
// Disable: Not supporte by POS
9086
@Override
9187
@Ignore

presto-native-tests/pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@
266266
<configuration>
267267
<forkCount>1</forkCount>
268268
<reuseForks>false</reuseForks>
269-
<excludedGroups>remote-function,textfile_reader</excludedGroups>
269+
<excludedGroups>remote-function</excludedGroups>
270270
<systemPropertyVariables>
271271
<PRESTO_SERVER>/root/project/build/debug/presto_cpp/main/presto_server</PRESTO_SERVER>
272272
</systemPropertyVariables>

0 commit comments

Comments
 (0)