@@ -105,65 +105,112 @@ def test_valid_score_modifiers_format(self):
105
105
106
106
@mark .fixed
107
107
class TestScoreModifierWithRerankCountSearch (MarqoTestCase ):
108
- def test_hybrid_search_rrf_score_modifiers_with_rerank_depth (self ):
108
+ def setUp (self ) -> None :
109
+ super ().setUp ()
110
+ self .docs_list = [
111
+ {"_id" : "both1" , "text_field_1" : "dogs" , "int_field_1" : - 1 }, # HIGH tensor, LOW lexical
112
+ {"_id" : "tensor1" , "text_field_1" : "puppies" , "int_field_1" : 2 }, # MID tensor
113
+ {"_id" : "tensor2" , "text_field_1" : "random words" , "int_field_1" : 3 }, # LOW tensor
114
+ ]
115
+
116
+ def test_hybrid_search_structured_rrf_score_modifiers_with_rerank_depth (self ):
109
117
"""
110
118
Test that hybrid search with RRF can use root level score_modifiers and rerank_depth
119
+ For structured indexes
111
120
"""
112
- test_cases = [
113
- (CloudTestIndex .unstructured_text , self .unstructured_index_name ),
114
- (CloudTestIndex .structured_text , self .structured_index_name )
115
- ]
116
121
117
- docs_list = [
118
- {"_id" : "both1" , "text_field_1" : "dogs" , "int_field_1" : - 1 }, # HIGH tensor, LOW lexical
119
- {"_id" : "tensor1" , "text_field_1" : "puppies" , "int_field_1" : 2 }, # MID tensor
120
- {"_id" : "tensor2" , "text_field_1" : "random words" , "int_field_1" : 3 }, # LOW tensor
121
- ]
122
+ cloud_test_index_to_use = CloudTestIndex .structured_text
123
+ open_source_test_index_name = self .structured_index_name
124
+
125
+ test_index_name = self .get_test_index_name (
126
+ cloud_test_index_to_use = cloud_test_index_to_use ,
127
+ open_source_test_index_name = open_source_test_index_name
128
+ )
129
+ self .client .index (test_index_name ).add_documents (self .docs_list )
130
+
131
+ # Get unmodified scores
132
+ # Unmodified result order should be: both1, tensor1, tensor2
133
+ unmodified_results = self .client .index (test_index_name ).search (q = "dogs" , search_method = "HYBRID" ,limit = 3 )
134
+ unmodified_scores = {hit ["_id" ]: hit ["_score" ] for hit in unmodified_results ["hits" ]}
135
+ self .assertEqual (["both1" , "tensor1" , "tensor2" ], [hit ["_id" ] for hit in unmodified_results ["hits" ]])
136
+
137
+ # Get modified scores (rank all 3)
138
+ # Modified result order should be: tensor2, tensor1, both1
139
+ score_modifiers = {
140
+ "multiply_score_by" : [
141
+ {"field_name" : "int_field_1" , "weight" : 1 }
142
+ ],
143
+ "add_to_score" : [
144
+ {"field_name" : "int_field_1" , "weight" : 1 }
145
+ ]
146
+ }
147
+ modified_results = self .client .index (test_index_name ).search (
148
+ q = "dogs" , search_method = "HYBRID" ,
149
+ limit = 3 , rerank_depth = 3 , score_modifiers = score_modifiers
150
+ )
151
+ self .assertEqual (["tensor2" , "tensor1" , "both1" ], [hit ["_id" ] for hit in modified_results ["hits" ]])
152
+ self .assertAlmostEqual (modified_results ["hits" ][0 ]["_score" ], 3 * unmodified_scores ["tensor2" ] + 3 )
153
+ self .assertAlmostEqual (modified_results ["hits" ][1 ]["_score" ], 2 * unmodified_scores ["tensor1" ] + 2 )
154
+ self .assertAlmostEqual (modified_results ["hits" ][2 ]["_score" ], - 1 * unmodified_scores ["both1" ] - 1 )
155
+
156
+ # Get modified scores (rank only 1). Only both1 should be rescored (goes to the bottom)
157
+ # Modified result order should be: tensor1, tensor2, both1
158
+ modified_results = self .client .index (test_index_name ).search (
159
+ q = "dogs" , search_method = "HYBRID" ,
160
+ limit = 3 , rerank_depth = 1 , score_modifiers = score_modifiers
161
+ )
162
+ self .assertEqual (["tensor1" , "tensor2" , "both1" ], [hit ["_id" ] for hit in modified_results ["hits" ]])
163
+ self .assertAlmostEqual (modified_results ["hits" ][0 ]["_score" ], unmodified_scores ["tensor1" ]) # unmodified
164
+ self .assertAlmostEqual (modified_results ["hits" ][1 ]["_score" ], unmodified_scores ["tensor2" ]) # unmodified
165
+ self .assertAlmostEqual (modified_results ["hits" ][2 ]["_score" ], - 1 * unmodified_scores ["both1" ] - 1 ) # modified
166
+
167
+ def test_hybrid_search_unstructured_rrf_score_modifiers_with_rerank_depth (self ):
168
+ """
169
+ Test that hybrid search with RRF can use root level score_modifiers and rerank_depth
170
+ For unstructured indexes
171
+ """
172
+
173
+ cloud_test_index_to_use = CloudTestIndex .unstructured_text
174
+ open_source_test_index_name = self .unstructured_index_name
122
175
123
- for cloud_test_index_to_use , open_source_test_index_name in test_cases :
124
- with self .subTest (cloud_test_index_to_use = cloud_test_index_to_use ,
125
- open_source_test_index_name = open_source_test_index_name ):
126
- test_index_name = self .get_test_index_name (
127
- cloud_test_index_to_use = cloud_test_index_to_use ,
128
- open_source_test_index_name = open_source_test_index_name
129
- )
130
- self .client .index (test_index_name ).add_documents (
131
- docs_list ,
132
- tensor_fields = ["text_field_1" ] if "unstr" in cloud_test_index_to_use or
133
- "unstr" in open_source_test_index_name else None )
134
-
135
- # Get unmodified scores
136
- # Unmodified result order should be: both1, tensor1, tensor2
137
- unmodified_results = self .client .index (test_index_name ).search (q = "dogs" , search_method = "HYBRID" ,limit = 3 )
138
- unmodified_scores = {hit ["_id" ]: hit ["_score" ] for hit in unmodified_results ["hits" ]}
139
- self .assertEqual (["both1" , "tensor1" , "tensor2" ], [hit ["_id" ] for hit in unmodified_results ["hits" ]])
140
-
141
- # Get modified scores (rank all 3)
142
- # Modified result order should be: tensor2, tensor1, both1
143
- score_modifiers = {
144
- "multiply_score_by" : [
145
- {"field_name" : "int_field_1" , "weight" : 1 }
146
- ],
147
- "add_to_score" : [
148
- {"field_name" : "int_field_1" , "weight" : 1 }
149
- ]
150
- }
151
- modified_results = self .client .index (test_index_name ).search (
152
- q = "dogs" , search_method = "HYBRID" ,
153
- limit = 3 , rerank_depth = 3 , score_modifiers = score_modifiers
154
- )
155
- self .assertEqual (["tensor2" , "tensor1" , "both1" ], [hit ["_id" ] for hit in modified_results ["hits" ]])
156
- self .assertAlmostEqual (modified_results ["hits" ][0 ]["_score" ], 3 * unmodified_scores ["tensor2" ] + 3 )
157
- self .assertAlmostEqual (modified_results ["hits" ][1 ]["_score" ], 2 * unmodified_scores ["tensor1" ] + 2 )
158
- self .assertAlmostEqual (modified_results ["hits" ][2 ]["_score" ], - 1 * unmodified_scores ["both1" ] - 1 )
159
-
160
- # Get modified scores (rank only 1). Only both1 should be rescored (goes to the bottom)
161
- # Modified result order should be: tensor1, tensor2, both1
162
- modified_results = self .client .index (test_index_name ).search (
163
- q = "dogs" , search_method = "HYBRID" ,
164
- limit = 3 , rerank_depth = 1 , score_modifiers = score_modifiers
165
- )
166
- self .assertEqual (["tensor1" , "tensor2" , "both1" ], [hit ["_id" ] for hit in modified_results ["hits" ]])
167
- self .assertAlmostEqual (modified_results ["hits" ][0 ]["_score" ], unmodified_scores ["tensor1" ]) # unmodified
168
- self .assertAlmostEqual (modified_results ["hits" ][1 ]["_score" ], unmodified_scores ["tensor2" ]) # unmodified
169
- self .assertAlmostEqual (modified_results ["hits" ][2 ]["_score" ], - 1 * unmodified_scores ["both1" ] - 1 ) # modified
176
+ test_index_name = self .get_test_index_name (
177
+ cloud_test_index_to_use = cloud_test_index_to_use ,
178
+ open_source_test_index_name = open_source_test_index_name
179
+ )
180
+ self .client .index (test_index_name ).add_documents (self .docs_list , tensor_fields = ["text_field_1" ])
181
+
182
+ # Get unmodified scores
183
+ # Unmodified result order should be: both1, tensor1, tensor2
184
+ unmodified_results = self .client .index (test_index_name ).search (q = "dogs" , search_method = "HYBRID" , limit = 3 )
185
+ unmodified_scores = {hit ["_id" ]: hit ["_score" ] for hit in unmodified_results ["hits" ]}
186
+ self .assertEqual (["both1" , "tensor1" , "tensor2" ], [hit ["_id" ] for hit in unmodified_results ["hits" ]])
187
+
188
+ # Get modified scores (rank all 3)
189
+ # Modified result order should be: tensor2, tensor1, both1
190
+ score_modifiers = {
191
+ "multiply_score_by" : [
192
+ {"field_name" : "int_field_1" , "weight" : 1 }
193
+ ],
194
+ "add_to_score" : [
195
+ {"field_name" : "int_field_1" , "weight" : 1 }
196
+ ]
197
+ }
198
+ modified_results = self .client .index (test_index_name ).search (
199
+ q = "dogs" , search_method = "HYBRID" ,
200
+ limit = 3 , rerank_depth = 3 , score_modifiers = score_modifiers
201
+ )
202
+ self .assertEqual (["tensor2" , "tensor1" , "both1" ], [hit ["_id" ] for hit in modified_results ["hits" ]])
203
+ self .assertAlmostEqual (modified_results ["hits" ][0 ]["_score" ], 3 * unmodified_scores ["tensor2" ] + 3 )
204
+ self .assertAlmostEqual (modified_results ["hits" ][1 ]["_score" ], 2 * unmodified_scores ["tensor1" ] + 2 )
205
+ self .assertAlmostEqual (modified_results ["hits" ][2 ]["_score" ], - 1 * unmodified_scores ["both1" ] - 1 )
206
+
207
+ # Get modified scores (rank only 1). Only both1 should be rescored (goes to the bottom)
208
+ # Modified result order should be: tensor1, tensor2, both1
209
+ modified_results = self .client .index (test_index_name ).search (
210
+ q = "dogs" , search_method = "HYBRID" ,
211
+ limit = 3 , rerank_depth = 1 , score_modifiers = score_modifiers
212
+ )
213
+ self .assertEqual (["tensor1" , "tensor2" , "both1" ], [hit ["_id" ] for hit in modified_results ["hits" ]])
214
+ self .assertAlmostEqual (modified_results ["hits" ][0 ]["_score" ], unmodified_scores ["tensor1" ]) # unmodified
215
+ self .assertAlmostEqual (modified_results ["hits" ][1 ]["_score" ], unmodified_scores ["tensor2" ]) # unmodified
216
+ self .assertAlmostEqual (modified_results ["hits" ][2 ]["_score" ], - 1 * unmodified_scores ["both1" ] - 1 ) # modified
0 commit comments