-
Notifications
You must be signed in to change notification settings - Fork 738
Introduce support for Reciprocal Rank Fusion (combining queries) #2489
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
41aedb9
7f1fdb7
8520d04
13e7b27
8039b8f
c2e6001
d9dff35
158b95a
476a887
78c0c29
d06e3cb
4941c04
829c0c0
c5e8395
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -39,7 +39,6 @@ | |
import org.apache.lucene.index.ReaderUtil; | ||
import org.apache.lucene.index.Term; | ||
import org.apache.lucene.search.FieldComparator; | ||
import org.apache.lucene.search.FuzzyTermsEnum; | ||
import org.apache.lucene.search.LeafFieldComparator; | ||
import org.apache.lucene.search.MatchNoDocsQuery; | ||
import org.apache.lucene.search.Pruning; | ||
|
@@ -403,8 +402,9 @@ public void process(ResponseBuilder rb) throws IOException { | |
|
||
req.getContext().put(SolrIndexSearcher.STATS_SOURCE, statsCache.get(req)); | ||
|
||
// TODO QueryResult ought not to be created and passed in methods of QueryComponent. | ||
// Should be created ~exclusively in SolrIndexSearcher and returned by it. | ||
QueryResult result = new QueryResult(); | ||
|
||
cmd.setSegmentTerminateEarly( | ||
params.getBool( | ||
CommonParams.SEGMENT_TERMINATE_EARLY, CommonParams.SEGMENT_TERMINATE_EARLY_DEFAULT)); | ||
|
@@ -1689,10 +1689,11 @@ private void doProcessUngroupedSearch(ResponseBuilder rb, QueryCommand cmd, Quer | |
|
||
SolrIndexSearcher searcher = req.getSearcher(); | ||
|
||
try { | ||
if (cmd.getQuery() instanceof SelfExecutingQuery) { | ||
// TODO QueryResult ought not to be created and passed in methods of QueryComponent | ||
result = ((SelfExecutingQuery) cmd.getQuery()).search(searcher, cmd); | ||
} else { | ||
Comment on lines
+1692
to
+1695
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here's where the minimal essential bit to alter QueryComponent by adding a new abstraction (SelfExecutingQuery) that might be useful for other features; not just "combining". |
||
searcher.search(result, cmd); | ||
} catch (FuzzyTermsEnum.FuzzyTermsException e) { | ||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); | ||
} | ||
rb.setResult(result); | ||
|
||
|
@@ -1764,4 +1765,9 @@ public float score() throws IOException { | |
return score; | ||
} | ||
} | ||
|
||
/** A {@link Query} that processes a command to search on its own. */ | ||
public interface SelfExecutingQuery { | ||
QueryResult search(SolrIndexSearcher searcher, QueryCommand cmd) throws IOException; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,10 +16,12 @@ | |
*/ | ||
package org.apache.solr.request.json; | ||
|
||
import static org.apache.solr.common.params.CombinerParams.COMBINER_KEYS; | ||
import static org.apache.solr.common.params.CommonParams.JSON; | ||
import static org.apache.solr.common.params.CommonParams.SORT; | ||
|
||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.LinkedHashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
@@ -241,13 +243,16 @@ public static void processParams( | |
if (queriesJsonObj instanceof Map && queriesJsonObj != null) { | ||
@SuppressWarnings("unchecked") | ||
final Map<String, Object> queriesAsMap = (Map<String, Object>) queriesJsonObj; | ||
ArrayList<String> queryKeys = new ArrayList<>(); | ||
for (Map.Entry<String, Object> queryJsonProperty : queriesAsMap.entrySet()) { | ||
out = queryJsonProperty.getKey(); | ||
queryKeys.add(out); | ||
arr = true; | ||
isQuery = true; | ||
convertJsonPropertyToLocalParams( | ||
newMap, jsonQueryConverter, queryJsonProperty, out, isQuery, arr); | ||
} | ||
newMap.put(COMBINER_KEYS, queryKeys.toArray(new String[0])); // nocommit a hack | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Making RequestUtils add a param specific to a niche feature is a hack. If we really need this here (?), I think we should use a generic name like "json.queries". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 for a generic name |
||
continue; | ||
} else { | ||
throw new SolrException( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,6 +58,7 @@ | |
import org.apache.lucene.search.DocIdSetIterator; | ||
import org.apache.lucene.search.Explanation; | ||
import org.apache.lucene.search.FieldDoc; | ||
import org.apache.lucene.search.FuzzyTermsEnum; | ||
import org.apache.lucene.search.IndexSearcher; | ||
import org.apache.lucene.search.LeafCollector; | ||
import org.apache.lucene.search.MatchAllDocsQuery; | ||
|
@@ -706,9 +707,12 @@ public <K, V> boolean regenerateItem( | |
} | ||
} | ||
|
||
public QueryResult search(QueryResult qr, QueryCommand cmd) throws IOException { | ||
getDocListC(qr, cmd); | ||
return qr; | ||
public void search(QueryResult qr, QueryCommand cmd) throws IOException { | ||
try { | ||
getDocListC(qr, cmd); | ||
} catch (FuzzyTermsEnum.FuzzyTermsException e) { // unsure where best to catch this; shrug | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I moved the FuzzyTermsException catch block from QueryComponent to here. I wanted it in a deeper place that would be caught by multiple inward code paths instead of just QC. It still feels it ought to be deeper but I didn't look for a better spot. CombineQuery calls into this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems all right to me, but I don't have a strong opinion |
||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); | ||
} | ||
} | ||
|
||
@Override | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.solr.search.combining; | ||
|
||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Objects; | ||
import org.apache.lucene.search.BooleanClause.Occur; | ||
import org.apache.lucene.search.MatchNoDocsQuery; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.lucene.search.QueryVisitor; | ||
import org.apache.solr.common.params.CombinerParams; | ||
import org.apache.solr.common.params.SolrParams; | ||
import org.apache.solr.common.util.NamedList; | ||
import org.apache.solr.common.util.SimpleOrderedMap; | ||
import org.apache.solr.handler.component.QueryComponent; | ||
import org.apache.solr.request.SolrQueryRequest; | ||
import org.apache.solr.search.ExtendedQueryBase; | ||
import org.apache.solr.search.QParser; | ||
import org.apache.solr.search.QParserPlugin; | ||
import org.apache.solr.search.QueryCommand; | ||
import org.apache.solr.search.QueryParsing; | ||
import org.apache.solr.search.QueryResult; | ||
import org.apache.solr.search.SolrIndexSearcher; | ||
import org.apache.solr.search.SyntaxError; | ||
|
||
public class CombineQParserPlugin extends QParserPlugin { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This source file captures the essential aspect of my proposal. Needs another iteration of refinement at least (e.g. javadocs, and param naming / defaulting). Maybe more. |
||
public static final String NAME = "combine"; | ||
|
||
@Override | ||
public QParser createParser( | ||
String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { | ||
return new CombineQParser(qstr, localParams, params, req); | ||
} | ||
|
||
static class CombineQParser extends QParser { | ||
|
||
private List<String> unparsedQueries; | ||
private QueriesCombiner queriesCombiningStrategy; | ||
private List<QParser> queryParsers; | ||
private List<Query> queries; | ||
|
||
public CombineQParser( | ||
String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { | ||
super(qstr, localParams, params, req); | ||
} | ||
|
||
@Override | ||
public Query parse() throws SyntaxError { | ||
|
||
var queriesToCombineKeys = localParams.getParams("keys"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not a huge fan of var as in my opinion it reduces readability, but if it's ok as stadnard in Solr, no strong opinion |
||
if (queriesToCombineKeys == null) { | ||
queriesToCombineKeys = params.getParams(CombinerParams.COMBINER_KEYS); | ||
} | ||
|
||
unparsedQueries = new ArrayList<>(queriesToCombineKeys.length); | ||
queryParsers = new ArrayList<>(queriesToCombineKeys.length); | ||
queries = new ArrayList<>(queriesToCombineKeys.length); | ||
|
||
// nocommit blend localParams and params without needless suffix in local | ||
var blendParams = SolrParams.wrapDefaults(localParams, params); | ||
queriesCombiningStrategy = QueriesCombiner.getImplementation(blendParams); | ||
|
||
String defType = blendParams.get(QueryParsing.DEFTYPE, DEFAULT_QTYPE); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe could support different |
||
|
||
for (String queryKey : queriesToCombineKeys) { | ||
final var unparsedQuery = blendParams.get(queryKey); | ||
final var parser = QParser.getParser(unparsedQuery, defType, req); | ||
var query = parser.getQuery(); | ||
if (query == null) { // sad this can happen | ||
query = new MatchNoDocsQuery(); | ||
} | ||
unparsedQueries.add(unparsedQuery); | ||
queryParsers.add(parser); | ||
queries.add(query); | ||
} | ||
|
||
return new CombineQuery(queriesCombiningStrategy, queries); | ||
} | ||
|
||
@Override | ||
public void addDebugInfo(NamedList<Object> dbg) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe 'addQueryDebugInfo' ? |
||
super.addDebugInfo(dbg); | ||
|
||
NamedList<NamedList<Object>> queriesDebug = new SimpleOrderedMap<>(); | ||
String[] queryKeys = req.getParams().getParams(CombinerParams.COMBINER_KEYS); | ||
for (int i = 0; i < queries.size(); i++) { | ||
NamedList<Object> singleQueryDebug = new SimpleOrderedMap<>(); | ||
singleQueryDebug.add("querystring", unparsedQueries.get(i)); | ||
singleQueryDebug.add("queryparser", queryParsers.get(i).getClass().getSimpleName()); | ||
singleQueryDebug.add("parsedquery", QueryParsing.toString(queries.get(i), req.getSchema())); | ||
singleQueryDebug.add("parsedquery_toString", queries.get(i).toString()); | ||
queriesDebug.add(queryKeys[i], singleQueryDebug); | ||
} | ||
dbg.add("queriesToCombine", queriesDebug); | ||
} | ||
} | ||
|
||
static final class CombineQuery extends ExtendedQueryBase | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A "SelfExecutingQuery" isn't normal; Lucene isn't going to work with this thing since it doesn't implement getWeight nor can it. But users don't need to know/care. |
||
implements QueryComponent.SelfExecutingQuery { | ||
|
||
private final QueriesCombiner combiner; | ||
private final List<Query> queries; | ||
|
||
public CombineQuery(QueriesCombiner combiner, List<Query> queries) { | ||
this.combiner = combiner; | ||
this.queries = queries; | ||
} | ||
|
||
@Override | ||
public String toString(String field) { | ||
return super.toString(field) + "{!" + NAME + "}"; // TODO others | ||
} | ||
|
||
@Override | ||
public boolean equals(Object o) { | ||
if (this == o) return true; | ||
if (!(o instanceof CombineQuery)) return false; | ||
CombineQuery that = (CombineQuery) o; | ||
return Objects.equals(combiner, that.combiner) && Objects.equals(queries, that.queries); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(combiner, queries); | ||
} | ||
|
||
@Override | ||
public void visit(QueryVisitor visitor) { | ||
for (Query query : queries) { | ||
query.visit(visitor.getSubVisitor(Occur.MUST, this)); | ||
} | ||
} | ||
|
||
@Override | ||
public QueryResult search(SolrIndexSearcher searcher, QueryCommand cmd) throws IOException { | ||
QueryResult[] results = new QueryResult[queries.size()]; | ||
// TODO do in multiple threads? | ||
for (int i = 0; i < queries.size(); i++) { | ||
cmd.setQuery(queries.get(i)); | ||
QueryResult qr = new QueryResult(); | ||
searcher.search(qr, cmd); | ||
results[i] = qr; | ||
} | ||
|
||
// nocommit but how is the docSet (e.g. for faceting) or maybe other things supported? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems like a fundamental limitation in the whole feature honestly. What Hossman said about re-rank query is interesting... not sure if it addresses this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. mmm probably I have lost in the comments why the way I created the docSet was incorrect. Can you elaborate? (a nice occasion to learn how that docSet part works, as I didn't have time yet) |
||
|
||
return combiner.combine(results); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.solr.search.combining; | ||
|
||
import static org.apache.solr.common.params.CombinerParams.RECIPROCAl_RANK_FUSION; | ||
|
||
import java.io.IOException; | ||
import java.util.List; | ||
import org.apache.lucene.search.Explanation; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.solr.common.SolrException; | ||
import org.apache.solr.common.params.CombinerParams; | ||
import org.apache.solr.common.params.SolrParams; | ||
import org.apache.solr.common.util.NamedList; | ||
import org.apache.solr.schema.IndexSchema; | ||
import org.apache.solr.search.DocList; | ||
import org.apache.solr.search.QueryResult; | ||
import org.apache.solr.search.SolrIndexSearcher; | ||
|
||
/** | ||
* Combining considers two or more query rankedLists: resultA, resultB ...<br> | ||
* For a given query, each query result is a ranked list of documents La = (a1,a2,...), Lb = (b1, | ||
* b2, ...)...<br> | ||
* A combining algorithm creates a unique ranked list I = (i1, i2, ...).<br> | ||
* This list is created by combining elements from the lists la and lb as described by the | ||
* implementation algorithm.<br> | ||
* since @Version 9.7 Used by {@link org.apache.solr.handler.component.QueryComponent} | ||
*/ | ||
public abstract class QueriesCombiner { | ||
dsmiley marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
protected int upTo; | ||
|
||
public QueriesCombiner(SolrParams requestParams) { | ||
this.upTo = | ||
requestParams.getInt(CombinerParams.COMBINER_UP_TO, CombinerParams.COMBINER_UP_TO_DEFAULT); | ||
} | ||
|
||
public abstract QueryResult combine(QueryResult[] rankedLists); | ||
|
||
protected QueryResult initCombinedResult(QueryResult[] rankedLists) { | ||
QueryResult combinedRankedList = new QueryResult(); | ||
boolean partialResults = false; | ||
for (QueryResult result : rankedLists) { | ||
partialResults |= result.isPartialResults(); | ||
} | ||
combinedRankedList.setPartialResults(partialResults); | ||
|
||
boolean segmentTerminatedEarly = false; | ||
for (QueryResult result : rankedLists) { | ||
if (result.getSegmentTerminatedEarly() != null) { | ||
segmentTerminatedEarly |= result.getSegmentTerminatedEarly(); | ||
} | ||
} | ||
combinedRankedList.setSegmentTerminatedEarly(segmentTerminatedEarly); | ||
|
||
combinedRankedList.setNextCursorMark(rankedLists[0].getNextCursorMark()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure about picking the first list here and/or what does use of a cursor mark mean when combining queries. Alternative might be to just disallow cursor marks when combining queries. |
||
return combinedRankedList; | ||
} | ||
|
||
public abstract NamedList<Explanation> getExplanations( | ||
String[] queryKeys, | ||
List<Query> queries, | ||
List<DocList> resultsPerQuery, | ||
SolrIndexSearcher searcher, | ||
IndexSchema schema) | ||
throws IOException; | ||
|
||
public static QueriesCombiner getImplementation(SolrParams requestParams) { | ||
String algorithm = requestParams.get(CombinerParams.COMBINER_ALGORITHM, RECIPROCAl_RANK_FUSION); | ||
switch (algorithm) { | ||
case RECIPROCAl_RANK_FUSION: | ||
return new ReciprocalRankFusion(requestParams); | ||
default: | ||
throw new SolrException( | ||
SolrException.ErrorCode.BAD_REQUEST, "Unknown Combining algorithm: " + algorithm); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I tend to not be a huge fan of comments in the code, what does this mean? it's just for your convenience in the draft?
If that's the case, ignore my comment