Skip to content

Commit a070982

Browse files
authored
fix(analyzer): Materialized view join filtering (#26630)
Summary: When there is a join query that involves materialized view, if the WHERE predicates includes columns of the join table, it throws a semantic exception about column cannot be resolved. Example query: SELECT * FROM mv JOIN table WHERE table_col1>10 The reason is that When Presto tries to optimize partition filtering for materialized views, it analyzes the WHERE clause using only the scope of the materialized view table. This fails when the WHERE clause contains columns from joined tables that haven't been processed yet. This PR fixes it by only filtering out columns that belong to the materialized view during the MV partition filtering optimization. Also it uses analyzeExpression instead of analyzeWhere to not record this analysis, so that it won't miss the join table columns with complete scope. Differential Revision: D87164277 ## Release Notes Please follow [release notes guidelines](https://github.com/prestodb/presto/wiki/Release-Notes-Guidelines) and fill in the release notes below. ``` == NO RELEASE NOTE == ```
1 parent 874b89c commit a070982

File tree

2 files changed

+116
-27
lines changed

2 files changed

+116
-27
lines changed

presto-hive/src/test/java/com/facebook/presto/hive/TestHiveMaterializedViewLogicalPlanner.java

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
import static java.lang.String.format;
8989
import static java.util.Collections.emptyList;
9090
import static java.util.stream.Collectors.joining;
91+
import static java.util.stream.Collectors.toList;
9192
import static org.testng.Assert.assertNotEquals;
9293
import static org.testng.Assert.assertTrue;
9394

@@ -2958,6 +2959,71 @@ public void testAutoRefreshMaterializedViewAfterInsertion()
29582959
}
29592960
}
29602961

2962+
@Test
2963+
public void testMVJoinQueryWithOtherTableColumnFiltering()
2964+
{
2965+
QueryRunner queryRunner = getQueryRunner();
2966+
Session session = getSession();
2967+
2968+
assertUpdate("CREATE TABLE mv_base (mv_col1 int, mv_col2 varchar, mv_col3 varchar) " +
2969+
"WITH (partitioned_by=ARRAY['mv_col3'])");
2970+
assertUpdate("CREATE TABLE join_table (table_col1 int, table_col2 varchar, table_col3 varchar) " +
2971+
" WITH (partitioned_by=ARRAY['table_col3'])");
2972+
2973+
assertUpdate("INSERT INTO mv_base VALUES (1, 'Alice', 'A'), (2, 'Bob', 'B'), (3, 'Charlie', 'C')", 3);
2974+
assertUpdate("INSERT INTO join_table VALUES (1, 'CityA', 'A'), (21, 'CityA', 'B'), (32, 'CityB', 'C')", 3);
2975+
2976+
assertUpdate("CREATE MATERIALIZED VIEW mv " +
2977+
"WITH (partitioned_by=ARRAY['mv_col3']) " +
2978+
"AS SELECT mv_col1, mv_col2, mv_col3 FROM mv_base");
2979+
2980+
assertUpdate("REFRESH MATERIALIZED VIEW mv WHERE mv_col3>='A'", 3);
2981+
2982+
// Query MV with JOIN and WHERE clause on column from joined table (not in MV)
2983+
MaterializedResult result = queryRunner.execute(session,
2984+
"SELECT mv_col2 FROM mv " +
2985+
"JOIN join_table ON mv_col3=table_col3 " +
2986+
"WHERE table_col1>10 ORDER BY mv_col1");
2987+
assertEquals(result.getRowCount(), 2, "Materialized view join produced unexpected row counts");
2988+
2989+
List<Object> expectedResults = List.of("Bob", "Charlie");
2990+
List<Object> actualResults = result.getMaterializedRows().stream()
2991+
.map(row -> row.getField(0))
2992+
.collect(toList());
2993+
assertEquals(actualResults, expectedResults, "Materialized view join returned unexpected row values");
2994+
2995+
// WHERE clause on MV column
2996+
result = queryRunner.execute(session, "SELECT mv_col2 FROM mv JOIN join_table " +
2997+
"ON mv_col3=table_col3 WHERE mv_col2>'Alice' ORDER BY mv_col2");
2998+
assertEquals(result.getRowCount(), 2, "Materialized view join produced unexpected row counts");
2999+
3000+
expectedResults = List.of("Bob", "Charlie");
3001+
actualResults = result.getMaterializedRows().stream()
3002+
.map(row -> row.getField(0))
3003+
.collect(toList());
3004+
assertEquals(actualResults, expectedResults, "Materialized view join returned unexpected row values");
3005+
3006+
// Test with multiple conditions in WHERE clause (non-partition column)
3007+
result = queryRunner.execute(session, "SELECT mv_col1 FROM mv JOIN join_table ON mv_col3=table_col3 " +
3008+
"WHERE table_col1>10 AND table_col3='B' AND mv_col1>1");
3009+
assertEquals(result.getRowCount(), 1, "Materialized view join produced unexpected row counts");
3010+
3011+
expectedResults = List.of(2);
3012+
actualResults = result.getMaterializedRows().stream()
3013+
.map(row -> row.getField(0))
3014+
.collect(toList());
3015+
assertEquals(actualResults, expectedResults, "Materialized view join returned unexpected row values");
3016+
3017+
// Test with multiple conditions in WHERE clause (partition column)
3018+
result = queryRunner.execute(session, "SELECT mv_col1 FROM mv JOIN join_table ON mv_col3=table_col3 " +
3019+
"WHERE table_col1>10 AND table_col3='B' AND mv_col3='C'");
3020+
assertEquals(result.getRowCount(), 0, "Materialized view join produced wrong results");
3021+
3022+
assertUpdate("DROP MATERIALIZED VIEW mv");
3023+
assertUpdate("DROP TABLE join_table");
3024+
assertUpdate("DROP TABLE mv_base");
3025+
}
3026+
29613027
public void testMaterializedViewNotRefreshedInNonLegacyMode()
29623028
{
29633029
Session nonLegacySession = Session.builder(getSession())

presto-main-base/src/main/java/com/facebook/presto/sql/analyzer/StatementAnalyzer.java

Lines changed: 50 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2551,37 +2551,60 @@ private MaterializedViewStatus getMaterializedViewStatus(QualifiedObjectName mat
25512551
Scope sourceScope = getScopeFromTable(table, scope);
25522552
Expression viewQueryWhereClause = currentSubquery.getWhere().get();
25532553

2554-
analyzeWhere(currentSubquery, sourceScope, viewQueryWhereClause);
2555-
2556-
DomainTranslator domainTranslator = new RowExpressionDomainTranslator(metadata);
2557-
RowExpression rowExpression = SqlToRowExpressionTranslator.translate(
2558-
viewQueryWhereClause,
2559-
analysis.getTypes(),
2560-
ImmutableMap.of(),
2561-
metadata.getFunctionAndTypeManager(),
2562-
session);
2563-
2564-
TupleDomain<String> viewQueryDomain = MaterializedViewUtils.getDomainFromFilter(session, domainTranslator, rowExpression);
2565-
2566-
Map<String, Map<SchemaTableName, String>> directColumnMappings = materializedViewDefinition.get().getDirectColumnMappingsAsMap();
2554+
// Extract column names from materialized view scope
2555+
Set<QualifiedName> materializedViewColumns = sourceScope.getRelationType().getAllFields().stream()
2556+
.map(field -> field.getName())
2557+
.filter(Optional::isPresent)
2558+
.map(Optional::get)
2559+
.map(QualifiedName::of)
2560+
.collect(Collectors.toSet());
25672561

2568-
// Get base query domain we have mapped from view query- if there are not direct mappings, don't filter partition count for predicate
2569-
boolean mappedToOneTable = true;
2570-
Map<String, Domain> rewrittenDomain = new HashMap<>();
2562+
// Only proceed with partition filtering if there are conjuncts that reference MV columns
2563+
List<Expression> conjuncts = ExpressionUtils.extractConjuncts(viewQueryWhereClause);
2564+
List<Expression> mvConjuncts = conjuncts.stream()
2565+
.filter(conjunct -> {
2566+
Set<QualifiedName> referencedColumns = VariablesExtractor.extractNames(conjunct, analysis.getColumnReferences());
2567+
return !referencedColumns.isEmpty() && referencedColumns.stream().allMatch(materializedViewColumns::contains);
2568+
})
2569+
.collect(Collectors.toList());
2570+
2571+
if (!mvConjuncts.isEmpty()) {
2572+
Expression filteredWhereClause = ExpressionUtils.combineConjuncts(mvConjuncts);
2573+
2574+
// Analyze the filtered WHERE clause only for type inference, don't record it in analysis
2575+
// to avoid preventing the full WHERE clause from being analyzed later
2576+
ExpressionAnalysis expressionAnalysis = analyzeExpression(filteredWhereClause, sourceScope);
2577+
2578+
DomainTranslator domainTranslator = new RowExpressionDomainTranslator(metadata);
2579+
RowExpression rowExpression = SqlToRowExpressionTranslator.translate(
2580+
filteredWhereClause,
2581+
analysis.getTypes(),
2582+
ImmutableMap.of(),
2583+
metadata.getFunctionAndTypeManager(),
2584+
session);
2585+
2586+
TupleDomain<String> viewQueryDomain = MaterializedViewUtils.getDomainFromFilter(session, domainTranslator, rowExpression);
2587+
2588+
Map<String, Map<SchemaTableName, String>> directColumnMappings = materializedViewDefinition.get().getDirectColumnMappingsAsMap();
2589+
2590+
// Get base query domain we have mapped from view query- if there are not direct mappings, don't filter partition count for predicate
2591+
boolean mappedToOneTable = true;
2592+
Map<String, Domain> rewrittenDomain = new HashMap<>();
2593+
2594+
for (Map.Entry<String, Domain> entry : viewQueryDomain.getDomains().orElse(ImmutableMap.of()).entrySet()) {
2595+
Map<SchemaTableName, String> baseTableMapping = directColumnMappings.get(entry.getKey());
2596+
if (baseTableMapping == null || baseTableMapping.size() != 1) {
2597+
mappedToOneTable = false;
2598+
break;
2599+
}
25712600

2572-
for (Map.Entry<String, Domain> entry : viewQueryDomain.getDomains().orElse(ImmutableMap.of()).entrySet()) {
2573-
Map<SchemaTableName, String> baseTableMapping = directColumnMappings.get(entry.getKey());
2574-
if (baseTableMapping == null || baseTableMapping.size() != 1) {
2575-
mappedToOneTable = false;
2576-
break;
2601+
String baseColumnName = baseTableMapping.entrySet().stream().findAny().get().getValue();
2602+
rewrittenDomain.put(baseColumnName, entry.getValue());
25772603
}
25782604

2579-
String baseColumnName = baseTableMapping.entrySet().stream().findAny().get().getValue();
2580-
rewrittenDomain.put(baseColumnName, entry.getValue());
2581-
}
2582-
2583-
if (mappedToOneTable) {
2584-
baseQueryDomain = TupleDomain.withColumnDomains(rewrittenDomain);
2605+
if (mappedToOneTable) {
2606+
baseQueryDomain = TupleDomain.withColumnDomains(rewrittenDomain);
2607+
}
25852608
}
25862609
}
25872610

0 commit comments

Comments
 (0)