Skip to content

Introduce support for Reciprocal Rank Fusion (combining queries) #2489

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FuzzyTermsEnum;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Pruning;
Expand Down Expand Up @@ -403,8 +402,9 @@ public void process(ResponseBuilder rb) throws IOException {

req.getContext().put(SolrIndexSearcher.STATS_SOURCE, statsCache.get(req));

// TODO QueryResult ought not to be created and passed in methods of QueryComponent.
// Should be created ~exclusively in SolrIndexSearcher and returned by it.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tend to not be a huge fan of comments in the code, what does this mean? it's just for your convenience in the draft?
If that's the case, ignore my comment

QueryResult result = new QueryResult();

cmd.setSegmentTerminateEarly(
params.getBool(
CommonParams.SEGMENT_TERMINATE_EARLY, CommonParams.SEGMENT_TERMINATE_EARLY_DEFAULT));
Expand Down Expand Up @@ -1689,10 +1689,11 @@ private void doProcessUngroupedSearch(ResponseBuilder rb, QueryCommand cmd, Quer

SolrIndexSearcher searcher = req.getSearcher();

try {
if (cmd.getQuery() instanceof SelfExecutingQuery) {
// TODO QueryResult ought not to be created and passed in methods of QueryComponent
result = ((SelfExecutingQuery) cmd.getQuery()).search(searcher, cmd);
} else {
Comment on lines +1692 to +1695
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here's where the minimal essential bit to alter QueryComponent by adding a new abstraction (SelfExecutingQuery) that might be useful for other features; not just "combining".

searcher.search(result, cmd);
} catch (FuzzyTermsEnum.FuzzyTermsException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
rb.setResult(result);

Expand Down Expand Up @@ -1764,4 +1765,9 @@ public float score() throws IOException {
return score;
}
}

/** A {@link Query} that processes a command to search on its own. */
public interface SelfExecutingQuery {
QueryResult search(SolrIndexSearcher searcher, QueryCommand cmd) throws IOException;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@
*/
package org.apache.solr.request.json;

import static org.apache.solr.common.params.CombinerParams.COMBINER_KEYS;
import static org.apache.solr.common.params.CommonParams.JSON;
import static org.apache.solr.common.params.CommonParams.SORT;

import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -241,13 +243,16 @@ public static void processParams(
if (queriesJsonObj instanceof Map && queriesJsonObj != null) {
@SuppressWarnings("unchecked")
final Map<String, Object> queriesAsMap = (Map<String, Object>) queriesJsonObj;
ArrayList<String> queryKeys = new ArrayList<>();
for (Map.Entry<String, Object> queryJsonProperty : queriesAsMap.entrySet()) {
out = queryJsonProperty.getKey();
queryKeys.add(out);
arr = true;
isQuery = true;
convertJsonPropertyToLocalParams(
newMap, jsonQueryConverter, queryJsonProperty, out, isQuery, arr);
}
newMap.put(COMBINER_KEYS, queryKeys.toArray(new String[0])); // nocommit a hack
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Making RequestUtils add a param specific to a niche feature is a hack. If we really need this here (?), I think we should use a generic name like "json.queries".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 for a generic name
json.queries looks all right to me

continue;
} else {
throw new SolrException(
Expand Down
2 changes: 2 additions & 0 deletions solr/core/src/java/org/apache/solr/search/QParserPlugin.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.solr.core.SolrInfoBean;
import org.apache.solr.metrics.SolrMetricsContext;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.combining.CombineQParserPlugin;
import org.apache.solr.search.join.BlockJoinChildQParserPlugin;
import org.apache.solr.search.join.BlockJoinParentQParserPlugin;
import org.apache.solr.search.join.FiltersQParserPlugin;
Expand Down Expand Up @@ -89,6 +90,7 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI
map.put(HashRangeQParserPlugin.NAME, new HashRangeQParserPlugin());
map.put(RankQParserPlugin.NAME, new RankQParserPlugin());
map.put(KnnQParserPlugin.NAME, new KnnQParserPlugin());
map.put(CombineQParserPlugin.NAME, new CombineQParserPlugin());

standardPlugins = Collections.unmodifiableMap(map);
}
Expand Down
10 changes: 7 additions & 3 deletions solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.FuzzyTermsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.MatchAllDocsQuery;
Expand Down Expand Up @@ -706,9 +707,12 @@ public <K, V> boolean regenerateItem(
}
}

public QueryResult search(QueryResult qr, QueryCommand cmd) throws IOException {
getDocListC(qr, cmd);
return qr;
public void search(QueryResult qr, QueryCommand cmd) throws IOException {
try {
getDocListC(qr, cmd);
} catch (FuzzyTermsEnum.FuzzyTermsException e) { // unsure where best to catch this; shrug
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved the FuzzyTermsException catch block from QueryComponent to here. I wanted it in a deeper place that would be caught by multiple inward code paths instead of just QC. It still feels it ought to be deeper but I didn't look for a better spot. CombineQuery calls into this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems all right to me, but I don't have a strong opinion

throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.solr.search.combining;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.solr.common.params.CombinerParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.handler.component.QueryComponent;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.ExtendedQueryBase;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.QueryCommand;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.QueryResult;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;

public class CombineQParserPlugin extends QParserPlugin {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This source file captures the essential aspect of my proposal. Needs another iteration of refinement at least (e.g. javadocs, and param naming / defaulting). Maybe more.

public static final String NAME = "combine";

@Override
public QParser createParser(
String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
return new CombineQParser(qstr, localParams, params, req);
}

static class CombineQParser extends QParser {

private List<String> unparsedQueries;
private QueriesCombiner queriesCombiningStrategy;
private List<QParser> queryParsers;
private List<Query> queries;

public CombineQParser(
String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
super(qstr, localParams, params, req);
}

@Override
public Query parse() throws SyntaxError {

var queriesToCombineKeys = localParams.getParams("keys");
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not a huge fan of var as in my opinion it reduces readability, but if it's ok as stadnard in Solr, no strong opinion

if (queriesToCombineKeys == null) {
queriesToCombineKeys = params.getParams(CombinerParams.COMBINER_KEYS);
}

unparsedQueries = new ArrayList<>(queriesToCombineKeys.length);
queryParsers = new ArrayList<>(queriesToCombineKeys.length);
queries = new ArrayList<>(queriesToCombineKeys.length);

// nocommit blend localParams and params without needless suffix in local
var blendParams = SolrParams.wrapDefaults(localParams, params);
queriesCombiningStrategy = QueriesCombiner.getImplementation(blendParams);

String defType = blendParams.get(QueryParsing.DEFTYPE, DEFAULT_QTYPE);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe could support different defType for different keys, though bit of a niche thing that perhaps.


for (String queryKey : queriesToCombineKeys) {
final var unparsedQuery = blendParams.get(queryKey);
final var parser = QParser.getParser(unparsedQuery, defType, req);
var query = parser.getQuery();
if (query == null) { // sad this can happen
query = new MatchNoDocsQuery();
}
unparsedQueries.add(unparsedQuery);
queryParsers.add(parser);
queries.add(query);
}

return new CombineQuery(queriesCombiningStrategy, queries);
}

@Override
public void addDebugInfo(NamedList<Object> dbg) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe 'addQueryDebugInfo' ?

super.addDebugInfo(dbg);

NamedList<NamedList<Object>> queriesDebug = new SimpleOrderedMap<>();
String[] queryKeys = req.getParams().getParams(CombinerParams.COMBINER_KEYS);
for (int i = 0; i < queries.size(); i++) {
NamedList<Object> singleQueryDebug = new SimpleOrderedMap<>();
singleQueryDebug.add("querystring", unparsedQueries.get(i));
singleQueryDebug.add("queryparser", queryParsers.get(i).getClass().getSimpleName());
singleQueryDebug.add("parsedquery", QueryParsing.toString(queries.get(i), req.getSchema()));
singleQueryDebug.add("parsedquery_toString", queries.get(i).toString());
queriesDebug.add(queryKeys[i], singleQueryDebug);
}
dbg.add("queriesToCombine", queriesDebug);
}
}

static final class CombineQuery extends ExtendedQueryBase
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A "SelfExecutingQuery" isn't normal; Lucene isn't going to work with this thing since it doesn't implement getWeight nor can it. But users don't need to know/care.

implements QueryComponent.SelfExecutingQuery {

private final QueriesCombiner combiner;
private final List<Query> queries;

public CombineQuery(QueriesCombiner combiner, List<Query> queries) {
this.combiner = combiner;
this.queries = queries;
}

@Override
public String toString(String field) {
return super.toString(field) + "{!" + NAME + "}"; // TODO others
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof CombineQuery)) return false;
CombineQuery that = (CombineQuery) o;
return Objects.equals(combiner, that.combiner) && Objects.equals(queries, that.queries);
}

@Override
public int hashCode() {
return Objects.hash(combiner, queries);
}

@Override
public void visit(QueryVisitor visitor) {
for (Query query : queries) {
query.visit(visitor.getSubVisitor(Occur.MUST, this));
}
}

@Override
public QueryResult search(SolrIndexSearcher searcher, QueryCommand cmd) throws IOException {
QueryResult[] results = new QueryResult[queries.size()];
// TODO do in multiple threads?
for (int i = 0; i < queries.size(); i++) {
cmd.setQuery(queries.get(i));
QueryResult qr = new QueryResult();
searcher.search(qr, cmd);
results[i] = qr;
}

// nocommit but how is the docSet (e.g. for faceting) or maybe other things supported?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like a fundamental limitation in the whole feature honestly. What Hossman said about re-rank query is interesting... not sure if it addresses this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mmm probably I have lost in the comments why the way I created the docSet was incorrect. Can you elaborate? (a nice occasion to learn how that docSet part works, as I didn't have time yet)


return combiner.combine(results);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.solr.search.combining;

import static org.apache.solr.common.params.CombinerParams.RECIPROCAl_RANK_FUSION;

import java.io.IOException;
import java.util.List;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CombinerParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.DocList;
import org.apache.solr.search.QueryResult;
import org.apache.solr.search.SolrIndexSearcher;

/**
* Combining considers two or more query rankedLists: resultA, resultB ...<br>
* For a given query, each query result is a ranked list of documents La = (a1,a2,...), Lb = (b1,
* b2, ...)...<br>
* A combining algorithm creates a unique ranked list I = (i1, i2, ...).<br>
* This list is created by combining elements from the lists la and lb as described by the
* implementation algorithm.<br>
* since @Version 9.7 Used by {@link org.apache.solr.handler.component.QueryComponent}
*/
public abstract class QueriesCombiner {

protected int upTo;

public QueriesCombiner(SolrParams requestParams) {
this.upTo =
requestParams.getInt(CombinerParams.COMBINER_UP_TO, CombinerParams.COMBINER_UP_TO_DEFAULT);
}

public abstract QueryResult combine(QueryResult[] rankedLists);

protected QueryResult initCombinedResult(QueryResult[] rankedLists) {
QueryResult combinedRankedList = new QueryResult();
boolean partialResults = false;
for (QueryResult result : rankedLists) {
partialResults |= result.isPartialResults();
}
combinedRankedList.setPartialResults(partialResults);

boolean segmentTerminatedEarly = false;
for (QueryResult result : rankedLists) {
if (result.getSegmentTerminatedEarly() != null) {
segmentTerminatedEarly |= result.getSegmentTerminatedEarly();
}
}
combinedRankedList.setSegmentTerminatedEarly(segmentTerminatedEarly);

combinedRankedList.setNextCursorMark(rankedLists[0].getNextCursorMark());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure about picking the first list here and/or what does use of a cursor mark mean when combining queries. Alternative might be to just disallow cursor marks when combining queries.

return combinedRankedList;
}

public abstract NamedList<Explanation> getExplanations(
String[] queryKeys,
List<Query> queries,
List<DocList> resultsPerQuery,
SolrIndexSearcher searcher,
IndexSchema schema)
throws IOException;

public static QueriesCombiner getImplementation(SolrParams requestParams) {
String algorithm = requestParams.get(CombinerParams.COMBINER_ALGORITHM, RECIPROCAl_RANK_FUSION);
switch (algorithm) {
case RECIPROCAl_RANK_FUSION:
return new ReciprocalRankFusion(requestParams);
default:
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST, "Unknown Combining algorithm: " + algorithm);
}
}
}
Loading
Loading