Skip to content

Commit b8659b1

Browse files
authored
separate rerank count tests (#277)
1 parent d590620 commit b8659b1

File tree

1 file changed

+104
-57
lines changed

1 file changed

+104
-57
lines changed

tests/v2_tests/test_score_modifier_search.py

+104-57
Original file line numberDiff line numberDiff line change
@@ -105,65 +105,112 @@ def test_valid_score_modifiers_format(self):
105105

106106
@mark.fixed
107107
class TestScoreModifierWithRerankCountSearch(MarqoTestCase):
108-
def test_hybrid_search_rrf_score_modifiers_with_rerank_depth(self):
108+
def setUp(self) -> None:
109+
super().setUp()
110+
self.docs_list = [
111+
{"_id": "both1", "text_field_1": "dogs", "int_field_1": -1}, # HIGH tensor, LOW lexical
112+
{"_id": "tensor1", "text_field_1": "puppies", "int_field_1": 2}, # MID tensor
113+
{"_id": "tensor2", "text_field_1": "random words", "int_field_1": 3}, # LOW tensor
114+
]
115+
116+
def test_hybrid_search_structured_rrf_score_modifiers_with_rerank_depth(self):
109117
"""
110118
Test that hybrid search with RRF can use root level score_modifiers and rerank_depth
119+
For structured indexes
111120
"""
112-
test_cases = [
113-
(CloudTestIndex.unstructured_text, self.unstructured_index_name),
114-
(CloudTestIndex.structured_text, self.structured_index_name)
115-
]
116121

117-
docs_list = [
118-
{"_id": "both1", "text_field_1": "dogs", "int_field_1": -1}, # HIGH tensor, LOW lexical
119-
{"_id": "tensor1", "text_field_1": "puppies", "int_field_1": 2}, # MID tensor
120-
{"_id": "tensor2", "text_field_1": "random words", "int_field_1": 3}, # LOW tensor
121-
]
122+
cloud_test_index_to_use = CloudTestIndex.structured_text
123+
open_source_test_index_name = self.structured_index_name
124+
125+
test_index_name = self.get_test_index_name(
126+
cloud_test_index_to_use=cloud_test_index_to_use,
127+
open_source_test_index_name=open_source_test_index_name
128+
)
129+
self.client.index(test_index_name).add_documents(self.docs_list)
130+
131+
# Get unmodified scores
132+
# Unmodified result order should be: both1, tensor1, tensor2
133+
unmodified_results = self.client.index(test_index_name).search(q="dogs", search_method="HYBRID",limit=3)
134+
unmodified_scores = {hit["_id"]: hit["_score"] for hit in unmodified_results["hits"]}
135+
self.assertEqual(["both1", "tensor1", "tensor2"], [hit["_id"] for hit in unmodified_results["hits"]])
136+
137+
# Get modified scores (rank all 3)
138+
# Modified result order should be: tensor2, tensor1, both1
139+
score_modifiers = {
140+
"multiply_score_by": [
141+
{"field_name": "int_field_1", "weight": 1}
142+
],
143+
"add_to_score": [
144+
{"field_name": "int_field_1", "weight": 1}
145+
]
146+
}
147+
modified_results = self.client.index(test_index_name).search(
148+
q="dogs", search_method="HYBRID",
149+
limit=3, rerank_depth=3, score_modifiers=score_modifiers
150+
)
151+
self.assertEqual(["tensor2", "tensor1", "both1"], [hit["_id"] for hit in modified_results["hits"]])
152+
self.assertAlmostEqual(modified_results["hits"][0]["_score"], 3*unmodified_scores["tensor2"] + 3)
153+
self.assertAlmostEqual(modified_results["hits"][1]["_score"], 2*unmodified_scores["tensor1"] + 2)
154+
self.assertAlmostEqual(modified_results["hits"][2]["_score"], -1*unmodified_scores["both1"] - 1)
155+
156+
# Get modified scores (rank only 1). Only both1 should be rescored (goes to the bottom)
157+
# Modified result order should be: tensor1, tensor2, both1
158+
modified_results = self.client.index(test_index_name).search(
159+
q="dogs", search_method="HYBRID",
160+
limit=3, rerank_depth=1, score_modifiers=score_modifiers
161+
)
162+
self.assertEqual(["tensor1", "tensor2", "both1"], [hit["_id"] for hit in modified_results["hits"]])
163+
self.assertAlmostEqual(modified_results["hits"][0]["_score"], unmodified_scores["tensor1"]) # unmodified
164+
self.assertAlmostEqual(modified_results["hits"][1]["_score"], unmodified_scores["tensor2"]) # unmodified
165+
self.assertAlmostEqual(modified_results["hits"][2]["_score"], -1*unmodified_scores["both1"] - 1) # modified
166+
167+
def test_hybrid_search_unstructured_rrf_score_modifiers_with_rerank_depth(self):
168+
"""
169+
Test that hybrid search with RRF can use root level score_modifiers and rerank_depth
170+
For unstructured indexes
171+
"""
172+
173+
cloud_test_index_to_use = CloudTestIndex.unstructured_text
174+
open_source_test_index_name = self.unstructured_index_name
122175

123-
for cloud_test_index_to_use, open_source_test_index_name in test_cases:
124-
with self.subTest(cloud_test_index_to_use=cloud_test_index_to_use,
125-
open_source_test_index_name=open_source_test_index_name):
126-
test_index_name = self.get_test_index_name(
127-
cloud_test_index_to_use=cloud_test_index_to_use,
128-
open_source_test_index_name=open_source_test_index_name
129-
)
130-
self.client.index(test_index_name).add_documents(
131-
docs_list,
132-
tensor_fields=["text_field_1"] if "unstr" in cloud_test_index_to_use or
133-
"unstr" in open_source_test_index_name else None)
134-
135-
# Get unmodified scores
136-
# Unmodified result order should be: both1, tensor1, tensor2
137-
unmodified_results = self.client.index(test_index_name).search(q="dogs", search_method="HYBRID",limit=3)
138-
unmodified_scores = {hit["_id"]: hit["_score"] for hit in unmodified_results["hits"]}
139-
self.assertEqual(["both1", "tensor1", "tensor2"], [hit["_id"] for hit in unmodified_results["hits"]])
140-
141-
# Get modified scores (rank all 3)
142-
# Modified result order should be: tensor2, tensor1, both1
143-
score_modifiers = {
144-
"multiply_score_by": [
145-
{"field_name": "int_field_1", "weight": 1}
146-
],
147-
"add_to_score": [
148-
{"field_name": "int_field_1", "weight": 1}
149-
]
150-
}
151-
modified_results = self.client.index(test_index_name).search(
152-
q="dogs", search_method="HYBRID",
153-
limit=3, rerank_depth=3, score_modifiers=score_modifiers
154-
)
155-
self.assertEqual(["tensor2", "tensor1", "both1"], [hit["_id"] for hit in modified_results["hits"]])
156-
self.assertAlmostEqual(modified_results["hits"][0]["_score"], 3*unmodified_scores["tensor2"] + 3)
157-
self.assertAlmostEqual(modified_results["hits"][1]["_score"], 2*unmodified_scores["tensor1"] + 2)
158-
self.assertAlmostEqual(modified_results["hits"][2]["_score"], -1*unmodified_scores["both1"] - 1)
159-
160-
# Get modified scores (rank only 1). Only both1 should be rescored (goes to the bottom)
161-
# Modified result order should be: tensor1, tensor2, both1
162-
modified_results = self.client.index(test_index_name).search(
163-
q="dogs", search_method="HYBRID",
164-
limit=3, rerank_depth=1, score_modifiers=score_modifiers
165-
)
166-
self.assertEqual(["tensor1", "tensor2", "both1"], [hit["_id"] for hit in modified_results["hits"]])
167-
self.assertAlmostEqual(modified_results["hits"][0]["_score"], unmodified_scores["tensor1"]) # unmodified
168-
self.assertAlmostEqual(modified_results["hits"][1]["_score"], unmodified_scores["tensor2"]) # unmodified
169-
self.assertAlmostEqual(modified_results["hits"][2]["_score"], -1*unmodified_scores["both1"] - 1) # modified
176+
test_index_name = self.get_test_index_name(
177+
cloud_test_index_to_use=cloud_test_index_to_use,
178+
open_source_test_index_name=open_source_test_index_name
179+
)
180+
self.client.index(test_index_name).add_documents(self.docs_list, tensor_fields=["text_field_1"])
181+
182+
# Get unmodified scores
183+
# Unmodified result order should be: both1, tensor1, tensor2
184+
unmodified_results = self.client.index(test_index_name).search(q="dogs", search_method="HYBRID", limit=3)
185+
unmodified_scores = {hit["_id"]: hit["_score"] for hit in unmodified_results["hits"]}
186+
self.assertEqual(["both1", "tensor1", "tensor2"], [hit["_id"] for hit in unmodified_results["hits"]])
187+
188+
# Get modified scores (rank all 3)
189+
# Modified result order should be: tensor2, tensor1, both1
190+
score_modifiers = {
191+
"multiply_score_by": [
192+
{"field_name": "int_field_1", "weight": 1}
193+
],
194+
"add_to_score": [
195+
{"field_name": "int_field_1", "weight": 1}
196+
]
197+
}
198+
modified_results = self.client.index(test_index_name).search(
199+
q="dogs", search_method="HYBRID",
200+
limit=3, rerank_depth=3, score_modifiers=score_modifiers
201+
)
202+
self.assertEqual(["tensor2", "tensor1", "both1"], [hit["_id"] for hit in modified_results["hits"]])
203+
self.assertAlmostEqual(modified_results["hits"][0]["_score"], 3 * unmodified_scores["tensor2"] + 3)
204+
self.assertAlmostEqual(modified_results["hits"][1]["_score"], 2 * unmodified_scores["tensor1"] + 2)
205+
self.assertAlmostEqual(modified_results["hits"][2]["_score"], -1 * unmodified_scores["both1"] - 1)
206+
207+
# Get modified scores (rank only 1). Only both1 should be rescored (goes to the bottom)
208+
# Modified result order should be: tensor1, tensor2, both1
209+
modified_results = self.client.index(test_index_name).search(
210+
q="dogs", search_method="HYBRID",
211+
limit=3, rerank_depth=1, score_modifiers=score_modifiers
212+
)
213+
self.assertEqual(["tensor1", "tensor2", "both1"], [hit["_id"] for hit in modified_results["hits"]])
214+
self.assertAlmostEqual(modified_results["hits"][0]["_score"], unmodified_scores["tensor1"]) # unmodified
215+
self.assertAlmostEqual(modified_results["hits"][1]["_score"], unmodified_scores["tensor2"]) # unmodified
216+
self.assertAlmostEqual(modified_results["hits"][2]["_score"], -1 * unmodified_scores["both1"] - 1) # modified

0 commit comments

Comments
 (0)