@@ -902,6 +902,67 @@ protected boolean addFL(StringBuilder fl, String field, boolean additionalAdded)
902902 return true ;
903903 }
904904
905+ protected abstract static class ShardDocQueue {
906+ public abstract boolean push (ShardDoc shardDoc );
907+
908+ public abstract Map <Object , ShardDoc > resultIds (int offset );
909+ }
910+ ;
911+
912+ protected ShardDocQueue newShardDocQueue (
913+ SolrIndexSearcher searcher , SortField [] sortFields , Integer size ) {
914+ return new ShardDocQueue () {
915+
916+ // id to shard mapping, to eliminate any accidental dups
917+ private final HashMap <Object , String > uniqueDoc = new HashMap <>();
918+
919+ private final ShardFieldSortedHitQueue queue =
920+ new ShardFieldSortedHitQueue (sortFields , size , searcher );
921+
922+ @ Override
923+ public boolean push (ShardDoc shardDoc ) {
924+ final String prevShard = uniqueDoc .put (shardDoc .id , shardDoc .shard );
925+ if (prevShard != null ) {
926+ // duplicate detected
927+
928+ // For now, just always use the first encountered since we can't currently
929+ // remove the previous one added to the priority queue. If we switched
930+ // to the Java5 PriorityQueue, this would be easier.
931+ return false ;
932+ // make which duplicate is used deterministic based on shard
933+ // if (prevShard.compareTo(shardDoc.shard) >= 0) {
934+ // TODO: remove previous from priority queue
935+ // return false;
936+ // }
937+ }
938+
939+ queue .insertWithOverflow (shardDoc );
940+ return true ;
941+ }
942+
943+ @ Override
944+ public Map <Object , ShardDoc > resultIds (int offset ) {
945+ final Map <Object , ShardDoc > resultIds = new HashMap <>();
946+
947+ // The queue now has 0 -> queuesize docs, where queuesize <= start + rows
948+ // So we want to pop the last documents off the queue to get
949+ // the docs offset -> queuesize
950+ int resultSize = queue .size () - offset ;
951+ resultSize = Math .max (0 , resultSize ); // there may not be any docs in range
952+
953+ for (int i = resultSize - 1 ; i >= 0 ; i --) {
954+ ShardDoc shardDoc = queue .pop ();
955+ shardDoc .positionInResponse = i ;
956+ // Need the toString() for correlation with other lists that must
957+ // be strings (like keys in highlighting, explain, etc)
958+ resultIds .put (shardDoc .id .toString (), shardDoc );
959+ }
960+
961+ return resultIds ;
962+ }
963+ };
964+ }
965+
905966 protected void mergeIds (ResponseBuilder rb , ShardRequest sreq ) {
906967 List <MergeStrategy > mergeStrategies = rb .getMergeStrategies ();
907968 if (mergeStrategies != null ) {
@@ -944,14 +1005,10 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
9441005 IndexSchema schema = rb .req .getSchema ();
9451006 SchemaField uniqueKeyField = schema .getUniqueKeyField ();
9461007
947- // id to shard mapping, to eliminate any accidental dups
948- HashMap <Object , String > uniqueDoc = new HashMap <>();
949-
9501008 // Merge the docs via a priority queue so we don't have to sort *all* of the
9511009 // documents... we only need to order the top (rows+start)
952- final ShardFieldSortedHitQueue queue =
953- new ShardFieldSortedHitQueue (
954- sortFields , ss .getOffset () + ss .getCount (), rb .req .getSearcher ());
1010+ final ShardDocQueue shardDocQueue =
1011+ newShardDocQueue (rb .req .getSearcher (), sortFields , ss .getOffset () + ss .getCount ());
9551012
9561013 NamedList <Object > shardInfo = null ;
9571014 if (rb .req .getParams ().getBool (ShardParams .SHARDS_INFO , false )) {
@@ -1122,23 +1179,6 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
11221179 for (int i = 0 ; i < docs .size (); i ++) {
11231180 SolrDocument doc = docs .get (i );
11241181 Object id = doc .getFieldValue (uniqueKeyField .getName ());
1125-
1126- String prevShard = uniqueDoc .put (id , srsp .getShard ());
1127- if (prevShard != null ) {
1128- // duplicate detected
1129- numFound --;
1130-
1131- // For now, just always use the first encountered since we can't currently
1132- // remove the previous one added to the priority queue. If we switched
1133- // to the Java5 PriorityQueue, this would be easier.
1134- continue ;
1135- // make which duplicate is used deterministic based on shard
1136- // if (prevShard.compareTo(srsp.shard) >= 0) {
1137- // TODO: remove previous from priority queue
1138- // continue;
1139- // }
1140- }
1141-
11421182 ShardDoc shardDoc = new ShardDoc ();
11431183 shardDoc .id = id ;
11441184 shardDoc .shard = srsp .getShard ();
@@ -1157,42 +1197,18 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
11571197
11581198 shardDoc .sortFieldValues = unmarshalledSortFieldValues ;
11591199
1160- queue .insertWithOverflow (shardDoc );
1200+ if (!shardDocQueue .push (shardDoc )) {
1201+ numFound --;
1202+ }
11611203 } // end for-each-doc-in-response
11621204 } // end for-each-response
11631205
1164- // The queue now has 0 -> queuesize docs, where queuesize <= start + rows
1165- // So we want to pop the last documents off the queue to get
1166- // the docs offset -> queuesize
1167- int resultSize = queue .size () - ss .getOffset ();
1168- resultSize = Math .max (0 , resultSize ); // there may not be any docs in range
1169-
1170- Map <Object , ShardDoc > resultIds = new HashMap <>();
1171- for (int i = resultSize - 1 ; i >= 0 ; i --) {
1172- ShardDoc shardDoc = queue .pop ();
1173- shardDoc .positionInResponse = i ;
1174- // Need the toString() for correlation with other lists that must
1175- // be strings (like keys in highlighting, explain, etc)
1176- resultIds .put (shardDoc .id .toString (), shardDoc );
1177- }
1178-
11791206 // Add hits for distributed requests
11801207 // https://issues.apache.org/jira/browse/SOLR-3518
11811208 rb .rsp .addToLog ("hits" , numFound );
11821209
1183- SolrDocumentList responseDocs = new SolrDocumentList ();
1184- if (maxScore != null ) responseDocs .setMaxScore (maxScore );
1185- responseDocs .setNumFound (numFound );
1186- responseDocs .setNumFoundExact (hitCountIsExact );
1187- responseDocs .setStart (ss .getOffset ());
1188- // size appropriately
1189- for (int i = 0 ; i < resultSize ; i ++) responseDocs .add (null );
1190-
1191- // save these results in a private area so we can access them
1192- // again when retrieving stored fields.
1193- // TODO: use ResponseBuilder (w/ comments) or the request context?
1194- rb .resultIds = resultIds ;
1195- rb .setResponseDocs (responseDocs );
1210+ setResultIdsAndResponseDocs (
1211+ rb , shardDocQueue , maxScore , numFound , hitCountIsExact , ss .getOffset ());
11961212
11971213 populateNextCursorMarkFromMergedShards (rb );
11981214
@@ -1238,6 +1254,30 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
12381254 }
12391255 }
12401256
1257+ protected void setResultIdsAndResponseDocs (
1258+ ResponseBuilder rb ,
1259+ ShardDocQueue shardDocQueue ,
1260+ Float maxScore ,
1261+ long numFound ,
1262+ boolean hitCountIsExact ,
1263+ int offset ) {
1264+ final Map <Object , ShardDoc > resultIds = shardDocQueue .resultIds (offset );
1265+
1266+ final SolrDocumentList responseDocs = new SolrDocumentList ();
1267+ if (maxScore != null ) responseDocs .setMaxScore (maxScore );
1268+ responseDocs .setNumFound (numFound );
1269+ responseDocs .setNumFoundExact (hitCountIsExact );
1270+ responseDocs .setStart (offset );
1271+ // size appropriately
1272+ for (int i = 0 ; i < resultIds .size (); i ++) responseDocs .add (null );
1273+
1274+ // save these results in a private area so we can access them
1275+ // again when retrieving stored fields.
1276+ // TODO: use ResponseBuilder (w/ comments) or the request context?
1277+ rb .resultIds = resultIds ;
1278+ rb .setResponseDocs (responseDocs );
1279+ }
1280+
12411281 /**
12421282 * Inspects the state of the {@link ResponseBuilder} and populates the next {@link
12431283 * ResponseBuilder#setNextCursorMark} as appropriate based on the merged sort values from
0 commit comments