apache
diff --git a/‎changelog/unreleased/SOLR-17319-added_combined_query_RRF.yml‎
Lines changed: 8 additions & 0 deletions b/‎changelog/unreleased/SOLR-17319-added_combined_query_RRF.yml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎solr/core/src/java/org/apache/solr/handler/component/CombinedQueryComponent.java‎
Lines changed: 622 additions & 0 deletions b/‎solr/core/src/java/org/apache/solr/handler/component/CombinedQueryComponent.java‎
Lines changed: 622 additions & 0 deletions
diff --git a/‎solr/core/src/java/org/apache/solr/handler/component/CombinedQueryResponseBuilder.java‎
Lines changed: 55 additions & 0 deletions b/‎solr/core/src/java/org/apache/solr/handler/component/CombinedQueryResponseBuilder.java‎
Lines changed: 55 additions & 0 deletions
diff --git a/‎solr/core/src/java/org/apache/solr/handler/component/CombinedQuerySearchHandler.java‎
Lines changed: 64 additions & 0 deletions b/‎solr/core/src/java/org/apache/solr/handler/component/CombinedQuerySearchHandler.java‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java‎
Lines changed: 92 additions & 52 deletions b/‎solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java‎
Lines changed: 92 additions & 52 deletions
@@ -0,0 +1,8 @@
+title: New CombinedQuerySearchHandler etc. for implementing hybrid search with reciprocal rank fusion (RRF).
+type: added
+authors:
+  - name: Sonu Sharma
+  - name: David Smiley
+links:
+  - name: SOLR-17319
+    url: https://issues.apache.org/jira/browse/SOLR-17319
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.component;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+
+/**
+ * The CombinedQueryResponseBuilder class extends the ResponseBuilder class and is responsible for
+ * building a combined response for multiple SearchComponent objects. It orchestrates the process of
+ * constructing the SolrQueryResponse by aggregating results from various components.
+ */
+class CombinedQueryResponseBuilder extends ResponseBuilder {
+
+  final List<ResponseBuilder> responseBuilders = new ArrayList<>();
+
+  CombinedQueryResponseBuilder(
+      SolrQueryRequest req, SolrQueryResponse rsp, List<SearchComponent> components) {
+    super(req, rsp, components);
+  }
+
+  /**
+   * Propagates all the properties from parent ResponseBuilder to the all the children which are
+   * being set later after the CombinedQueryComponent is prepared.
+   */
+  final void propagate() {
+    responseBuilders.forEach(
+        thisRb -> {
+          thisRb.setNeedDocSet(isNeedDocSet());
+          thisRb.setNeedDocList(isNeedDocList());
+          thisRb.doFacets = doFacets;
+          thisRb.doHighlights = doHighlights;
+          thisRb.doExpand = doExpand;
+          thisRb.doTerms = doTerms;
+          thisRb.doStats = doStats;
+          thisRb.setDistribStatsDisabled(isDistribStatsDisabled());
+        });
+  }
+}
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.component;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.solr.common.params.CombinerParams;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+
+/**
+ * Extends the SearchHandler combining/fusing multiple queries (e.g. RRF) when the {@link
+ * CombinerParams#COMBINER} param is provided. If it isn't, does nothing special over SearchHandler.
+ *
+ * @see CombinedQueryComponent
+ */
+public class CombinedQuerySearchHandler extends SearchHandler {
+
+  /** Overrides to potentially return a custom {@link CombinedQueryResponseBuilder}. */
+  @Override
+  protected ResponseBuilder newResponseBuilder(
+      SolrQueryRequest req, SolrQueryResponse rsp, List<SearchComponent> components) {
+    if (req.getParams().getBool(CombinerParams.COMBINER, false)) {
+      var rb = new CombinedQueryResponseBuilder(req, rsp, components);
+      // CombinedQueryComponent is only designed to work with distributed search.
+      rb.setForcedDistrib(true);
+      return rb;
+    }
+    return super.newResponseBuilder(req, rsp, components);
+  }
+
+  @Override
+  protected void postPrepareComponents(ResponseBuilder rb) {
+    super.postPrepareComponents(rb);
+    // propagate the CombinedQueryResponseBuilder's state to all subBuilders after prepare
+    if (rb instanceof CombinedQueryResponseBuilder) {
+      var crb = (CombinedQueryResponseBuilder) rb;
+      crb.propagate();
+    }
+  }
+
+  /** Overrides the default list to include {@link CombinedQueryComponent}. */
+  @Override
+  protected List<String> getDefaultComponents() {
+    List<String> names = new ArrayList<>(super.getDefaultComponents());
+    String replaced = names.set(0, CombinedQueryComponent.COMPONENT_NAME);
+    assert replaced.equals(QueryComponent.COMPONENT_NAME);
+    return names;
+  }
+}
@@ -902,6 +902,67 @@ protected boolean addFL(StringBuilder fl, String field, boolean additionalAdded)
     return true;
   }
 
+  protected abstract static class ShardDocQueue {
+    public abstract boolean push(ShardDoc shardDoc);
+
+    public abstract Map<Object, ShardDoc> resultIds(int offset);
+  }
+  ;
+
+  protected ShardDocQueue newShardDocQueue(
+      SolrIndexSearcher searcher, SortField[] sortFields, Integer size) {
+    return new ShardDocQueue() {
+
+      // id to shard mapping, to eliminate any accidental dups
+      private final HashMap<Object, String> uniqueDoc = new HashMap<>();
+
+      private final ShardFieldSortedHitQueue queue =
+          new ShardFieldSortedHitQueue(sortFields, size, searcher);
+
+      @Override
+      public boolean push(ShardDoc shardDoc) {
+        final String prevShard = uniqueDoc.put(shardDoc.id, shardDoc.shard);
+        if (prevShard != null) {
+          // duplicate detected
+
+          // For now, just always use the first encountered since we can't currently
+          // remove the previous one added to the priority queue.  If we switched
+          // to the Java5 PriorityQueue, this would be easier.
+          return false;
+          // make which duplicate is used deterministic based on shard
+          // if (prevShard.compareTo(shardDoc.shard) >= 0) {
+          //  TODO: remove previous from priority queue
+          //  return false;
+          // }
+        }
+
+        queue.insertWithOverflow(shardDoc);
+        return true;
+      }
+
+      @Override
+      public Map<Object, ShardDoc> resultIds(int offset) {
+        final Map<Object, ShardDoc> resultIds = new HashMap<>();
+
+        // The queue now has 0 -> queuesize docs, where queuesize <= start + rows
+        // So we want to pop the last documents off the queue to get
+        // the docs offset -> queuesize
+        int resultSize = queue.size() - offset;
+        resultSize = Math.max(0, resultSize); // there may not be any docs in range
+
+        for (int i = resultSize - 1; i >= 0; i--) {
+          ShardDoc shardDoc = queue.pop();
+          shardDoc.positionInResponse = i;
+          // Need the toString() for correlation with other lists that must
+          // be strings (like keys in highlighting, explain, etc)
+          resultIds.put(shardDoc.id.toString(), shardDoc);
+        }
+
+        return resultIds;
+      }
+    };
+  }
+
   protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
     List<MergeStrategy> mergeStrategies = rb.getMergeStrategies();
     if (mergeStrategies != null) {
@@ -944,14 +1005,10 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
     IndexSchema schema = rb.req.getSchema();
     SchemaField uniqueKeyField = schema.getUniqueKeyField();
 
-    // id to shard mapping, to eliminate any accidental dups
-    HashMap<Object, String> uniqueDoc = new HashMap<>();
-
     // Merge the docs via a priority queue so we don't have to sort *all* of the
     // documents... we only need to order the top (rows+start)
-    final ShardFieldSortedHitQueue queue =
-        new ShardFieldSortedHitQueue(
-            sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher());
+    final ShardDocQueue shardDocQueue =
+        newShardDocQueue(rb.req.getSearcher(), sortFields, ss.getOffset() + ss.getCount());
 
     NamedList<Object> shardInfo = null;
     if (rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) {
@@ -1122,23 +1179,6 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
       for (int i = 0; i < docs.size(); i++) {
         SolrDocument doc = docs.get(i);
         Object id = doc.getFieldValue(uniqueKeyField.getName());
-
-        String prevShard = uniqueDoc.put(id, srsp.getShard());
-        if (prevShard != null) {
-          // duplicate detected
-          numFound--;
-
-          // For now, just always use the first encountered since we can't currently
-          // remove the previous one added to the priority queue.  If we switched
-          // to the Java5 PriorityQueue, this would be easier.
-          continue;
-          // make which duplicate is used deterministic based on shard
-          // if (prevShard.compareTo(srsp.shard) >= 0) {
-          //  TODO: remove previous from priority queue
-          //  continue;
-          // }
-        }
-
         ShardDoc shardDoc = new ShardDoc();
         shardDoc.id = id;
         shardDoc.shard = srsp.getShard();
@@ -1157,42 +1197,18 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
 
         shardDoc.sortFieldValues = unmarshalledSortFieldValues;
 
-        queue.insertWithOverflow(shardDoc);
+        if (!shardDocQueue.push(shardDoc)) {
+          numFound--;
+        }
       } // end for-each-doc-in-response
     } // end for-each-response
 
-    // The queue now has 0 -> queuesize docs, where queuesize <= start + rows
-    // So we want to pop the last documents off the queue to get
-    // the docs offset -> queuesize
-    int resultSize = queue.size() - ss.getOffset();
-    resultSize = Math.max(0, resultSize); // there may not be any docs in range
-
-    Map<Object, ShardDoc> resultIds = new HashMap<>();
-    for (int i = resultSize - 1; i >= 0; i--) {
-      ShardDoc shardDoc = queue.pop();
-      shardDoc.positionInResponse = i;
-      // Need the toString() for correlation with other lists that must
-      // be strings (like keys in highlighting, explain, etc)
-      resultIds.put(shardDoc.id.toString(), shardDoc);
-    }
-
     // Add hits for distributed requests
     // https://issues.apache.org/jira/browse/SOLR-3518
     rb.rsp.addToLog("hits", numFound);
 
-    SolrDocumentList responseDocs = new SolrDocumentList();
-    if (maxScore != null) responseDocs.setMaxScore(maxScore);
-    responseDocs.setNumFound(numFound);
-    responseDocs.setNumFoundExact(hitCountIsExact);
-    responseDocs.setStart(ss.getOffset());
-    // size appropriately
-    for (int i = 0; i < resultSize; i++) responseDocs.add(null);
-
-    // save these results in a private area so we can access them
-    // again when retrieving stored fields.
-    // TODO: use ResponseBuilder (w/ comments) or the request context?
-    rb.resultIds = resultIds;
-    rb.setResponseDocs(responseDocs);
+    setResultIdsAndResponseDocs(
+        rb, shardDocQueue, maxScore, numFound, hitCountIsExact, ss.getOffset());
 
     populateNextCursorMarkFromMergedShards(rb);
 
@@ -1238,6 +1254,30 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
     }
   }
 
+  protected void setResultIdsAndResponseDocs(
+      ResponseBuilder rb,
+      ShardDocQueue shardDocQueue,
+      Float maxScore,
+      long numFound,
+      boolean hitCountIsExact,
+      int offset) {
+    final Map<Object, ShardDoc> resultIds = shardDocQueue.resultIds(offset);
+
+    final SolrDocumentList responseDocs = new SolrDocumentList();
+    if (maxScore != null) responseDocs.setMaxScore(maxScore);
+    responseDocs.setNumFound(numFound);
+    responseDocs.setNumFoundExact(hitCountIsExact);
+    responseDocs.setStart(offset);
+    // size appropriately
+    for (int i = 0; i < resultIds.size(); i++) responseDocs.add(null);
+
+    // save these results in a private area so we can access them
+    // again when retrieving stored fields.
+    // TODO: use ResponseBuilder (w/ comments) or the request context?
+    rb.resultIds = resultIds;
+    rb.setResponseDocs(responseDocs);
+  }
+
   /**
    * Inspects the state of the {@link ResponseBuilder} and populates the next {@link
    * ResponseBuilder#setNextCursorMark} as appropriate based on the merged sort values from