@@ -907,6 +907,67 @@ protected boolean addFL(StringBuilder fl, String field, boolean additionalAdded)
907907 return true ;
908908 }
909909
910+ protected abstract static class ShardDocQueue {
911+ public abstract boolean push (ShardDoc shardDoc );
912+
913+ public abstract Map <Object , ShardDoc > resultIds (int offset );
914+ }
915+ ;
916+
917+ protected ShardDocQueue newShardDocQueue (
918+ SolrIndexSearcher searcher , SortField [] sortFields , Integer size ) {
919+ return new ShardDocQueue () {
920+
921+ // id to shard mapping, to eliminate any accidental dups
922+ private final HashMap <Object , String > uniqueDoc = new HashMap <>();
923+
924+ private final ShardFieldSortedHitQueue queue =
925+ new ShardFieldSortedHitQueue (sortFields , size , searcher );
926+
927+ @ Override
928+ public boolean push (ShardDoc shardDoc ) {
929+ final String prevShard = uniqueDoc .put (shardDoc .id , shardDoc .shard );
930+ if (prevShard != null ) {
931+ // duplicate detected
932+
933+ // For now, just always use the first encountered since we can't currently
934+ // remove the previous one added to the priority queue. If we switched
935+ // to the Java5 PriorityQueue, this would be easier.
936+ return false ;
937+ // make which duplicate is used deterministic based on shard
938+ // if (prevShard.compareTo(shardDoc.shard) >= 0) {
939+ // TODO: remove previous from priority queue
940+ // return false;
941+ // }
942+ }
943+
944+ queue .insertWithOverflow (shardDoc );
945+ return true ;
946+ }
947+
948+ @ Override
949+ public Map <Object , ShardDoc > resultIds (int offset ) {
950+ final Map <Object , ShardDoc > resultIds = new HashMap <>();
951+
952+ // The queue now has 0 -> queuesize docs, where queuesize <= start + rows
953+ // So we want to pop the last documents off the queue to get
954+ // the docs offset -> queuesize
955+ int resultSize = queue .size () - offset ;
956+ resultSize = Math .max (0 , resultSize ); // there may not be any docs in range
957+
958+ for (int i = resultSize - 1 ; i >= 0 ; i --) {
959+ ShardDoc shardDoc = queue .pop ();
960+ shardDoc .positionInResponse = i ;
961+ // Need the toString() for correlation with other lists that must
962+ // be strings (like keys in highlighting, explain, etc)
963+ resultIds .put (shardDoc .id .toString (), shardDoc );
964+ }
965+
966+ return resultIds ;
967+ }
968+ };
969+ }
970+
910971 protected void mergeIds (ResponseBuilder rb , ShardRequest sreq ) {
911972 List <MergeStrategy > mergeStrategies = rb .getMergeStrategies ();
912973 if (mergeStrategies != null ) {
@@ -949,14 +1010,10 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
9491010 IndexSchema schema = rb .req .getSchema ();
9501011 SchemaField uniqueKeyField = schema .getUniqueKeyField ();
9511012
952- // id to shard mapping, to eliminate any accidental dups
953- HashMap <Object , String > uniqueDoc = new HashMap <>();
954-
9551013 // Merge the docs via a priority queue so we don't have to sort *all* of the
9561014 // documents... we only need to order the top (rows+start)
957- final ShardFieldSortedHitQueue queue =
958- new ShardFieldSortedHitQueue (
959- sortFields , ss .getOffset () + ss .getCount (), rb .req .getSearcher ());
1015+ final ShardDocQueue shardDocQueue =
1016+ newShardDocQueue (rb .req .getSearcher (), sortFields , ss .getOffset () + ss .getCount ());
9601017
9611018 NamedList <Object > shardInfo = null ;
9621019 if (rb .req .getParams ().getBool (ShardParams .SHARDS_INFO , false )) {
@@ -1127,23 +1184,6 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
11271184 for (int i = 0 ; i < docs .size (); i ++) {
11281185 SolrDocument doc = docs .get (i );
11291186 Object id = doc .getFieldValue (uniqueKeyField .getName ());
1130-
1131- String prevShard = uniqueDoc .put (id , srsp .getShard ());
1132- if (prevShard != null ) {
1133- // duplicate detected
1134- numFound --;
1135-
1136- // For now, just always use the first encountered since we can't currently
1137- // remove the previous one added to the priority queue. If we switched
1138- // to the Java5 PriorityQueue, this would be easier.
1139- continue ;
1140- // make which duplicate is used deterministic based on shard
1141- // if (prevShard.compareTo(srsp.shard) >= 0) {
1142- // TODO: remove previous from priority queue
1143- // continue;
1144- // }
1145- }
1146-
11471187 ShardDoc shardDoc = new ShardDoc ();
11481188 shardDoc .id = id ;
11491189 shardDoc .shard = srsp .getShard ();
@@ -1162,42 +1202,18 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
11621202
11631203 shardDoc .sortFieldValues = unmarshalledSortFieldValues ;
11641204
1165- queue .insertWithOverflow (shardDoc );
1205+ if (!shardDocQueue .push (shardDoc )) {
1206+ numFound --;
1207+ }
11661208 } // end for-each-doc-in-response
11671209 } // end for-each-response
11681210
1169- // The queue now has 0 -> queuesize docs, where queuesize <= start + rows
1170- // So we want to pop the last documents off the queue to get
1171- // the docs offset -> queuesize
1172- int resultSize = queue .size () - ss .getOffset ();
1173- resultSize = Math .max (0 , resultSize ); // there may not be any docs in range
1174-
1175- Map <Object , ShardDoc > resultIds = new HashMap <>();
1176- for (int i = resultSize - 1 ; i >= 0 ; i --) {
1177- ShardDoc shardDoc = queue .pop ();
1178- shardDoc .positionInResponse = i ;
1179- // Need the toString() for correlation with other lists that must
1180- // be strings (like keys in highlighting, explain, etc)
1181- resultIds .put (shardDoc .id .toString (), shardDoc );
1182- }
1183-
11841211 // Add hits for distributed requests
11851212 // https://issues.apache.org/jira/browse/SOLR-3518
11861213 rb .rsp .addToLog ("hits" , numFound );
11871214
1188- SolrDocumentList responseDocs = new SolrDocumentList ();
1189- if (maxScore != null ) responseDocs .setMaxScore (maxScore );
1190- responseDocs .setNumFound (numFound );
1191- responseDocs .setNumFoundExact (hitCountIsExact );
1192- responseDocs .setStart (ss .getOffset ());
1193- // size appropriately
1194- for (int i = 0 ; i < resultSize ; i ++) responseDocs .add (null );
1195-
1196- // save these results in a private area so we can access them
1197- // again when retrieving stored fields.
1198- // TODO: use ResponseBuilder (w/ comments) or the request context?
1199- rb .resultIds = resultIds ;
1200- rb .setResponseDocs (responseDocs );
1215+ setResultIdsAndResponseDocs (
1216+ rb , shardDocQueue , maxScore , numFound , hitCountIsExact , ss .getOffset ());
12011217
12021218 populateNextCursorMarkFromMergedShards (rb );
12031219
@@ -1243,6 +1259,30 @@ protected void mergeIds(ResponseBuilder rb, ShardRequest sreq) {
12431259 }
12441260 }
12451261
1262+ protected void setResultIdsAndResponseDocs (
1263+ ResponseBuilder rb ,
1264+ ShardDocQueue shardDocQueue ,
1265+ Float maxScore ,
1266+ long numFound ,
1267+ boolean hitCountIsExact ,
1268+ int offset ) {
1269+ final Map <Object , ShardDoc > resultIds = shardDocQueue .resultIds (offset );
1270+
1271+ final SolrDocumentList responseDocs = new SolrDocumentList ();
1272+ if (maxScore != null ) responseDocs .setMaxScore (maxScore );
1273+ responseDocs .setNumFound (numFound );
1274+ responseDocs .setNumFoundExact (hitCountIsExact );
1275+ responseDocs .setStart (offset );
1276+ // size appropriately
1277+ for (int i = 0 ; i < resultIds .size (); i ++) responseDocs .add (null );
1278+
1279+ // save these results in a private area so we can access them
1280+ // again when retrieving stored fields.
1281+ // TODO: use ResponseBuilder (w/ comments) or the request context?
1282+ rb .resultIds = resultIds ;
1283+ rb .setResponseDocs (responseDocs );
1284+ }
1285+
12461286 /**
12471287 * Inspects the state of the {@link ResponseBuilder} and populates the next {@link
12481288 * ResponseBuilder#setNextCursorMark} as appropriate based on the merged sort values from
0 commit comments