Skip to content

Commit 15de44c

Browse files
VitusAcabadopapa99dodanyilq
authored
Fix tests for 2.17 (#279)
* remove langbind tests * bump minimum supported version to 2.17.1-cloud * switch to supported models * fix version check * fix more tests * rerank depth test fix * skip image chunking test * fix recommend doc fields * Temporarily disable flaky test --------- Co-authored-by: yihanzhao <[email protected]> Co-authored-by: danyilq <[email protected]>
1 parent 8842916 commit 15de44c

File tree

10 files changed

+70
-330
lines changed

10 files changed

+70
-330
lines changed

.github/workflows/open-source-unit-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ jobs:
9999
python -m pip install --upgrade pip
100100
pip install -r requirements.txt
101101
export PYTHONPATH=$(pwd):$(pwd)/src:$PYTHONPATH
102-
SUPPORTED_MQ_VERSION=$(python -c 'from marqo import version; print(version.__minimum_supported_marqo_version__)') || exit 1
102+
SUPPORTED_MQ_VERSION=$(python -c 'from marqo import version; print(f"{version.__minimum_supported_marqo_version__}-cloud")') || exit 1
103103
104104
# error out if version is empty:
105105
if [ -z "$SUPPORTED_MQ_VERSION" ]; then exit 1; fi

src/marqo/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__minimum_supported_marqo_version__ = "2.15.0"
1+
__minimum_supported_marqo_version__ = "2.17.1"
22

33
# NOTE: This isn't used anywhere
44
def supported_marqo_version() -> str:

tests/cloud_test_logic/cloud_test_index.py

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ class CloudTestIndex(str, Enum):
1414
3) unstructured_no_model: 512-dimension custom vectors, 1 shard, no replicas, CPU, basic storage.
1515
4) structured_text: Structured text index with hf/e5-base-v2, lexical search, 2 shards, 1 replica, CPU, balanced storage.
1616
5) structured_image: Structured image-text index with open_clip/ViT-B-32, 2 shards, 1 replica, CPU, balanced storage, with image preprocessing.
17-
6) structured_languagebind_model: a structured index using the LanguageBind model for multi-modal support.
1817
For more information on the settings of each index, please refer to index_name_to_settings_mappings.
1918
2019
FOR CLOUD REPLICAS AND SHARDS:
@@ -35,7 +34,6 @@ class CloudTestIndex(str, Enum):
3534
structured_image_custom = "pymarqo_str_img_custom"
3635
structured_text = "pymarqo_str_txt"
3736
structured_image = "pymarqo_str_img"
38-
structured_languagebind_model = "pymarqo_str_langbind_model"
3937

4038

4139
index_name_to_settings_mappings = {
@@ -123,40 +121,4 @@ class CloudTestIndex(str, Enum):
123121
"patchMethod": "simple",
124122
}
125123
},
126-
CloudTestIndex.structured_languagebind_model: {
127-
"type": "structured",
128-
"model": "LanguageBind/Video_V1.5_FT_Audio_FT_Image",
129-
"inferenceType": "marqo.GPU",
130-
"storageClass": "marqo.balanced",
131-
"allFields": [
132-
{"name": "text_field_1", "type": "text"},
133-
{"name": "text_field_2", "type": "text"},
134-
{"name": "text_field_3", "type": "text"},
135-
{"name": "video_field_1", "type": "video_pointer"},
136-
{"name": "video_field_2", "type": "video_pointer"},
137-
{"name": "video_field_3", "type": "video_pointer"},
138-
{"name": "audio_field_1", "type": "audio_pointer"},
139-
{"name": "audio_field_2", "type": "audio_pointer"},
140-
{"name": "image_field_1", "type": "image_pointer"},
141-
{"name": "image_field_2", "type": "image_pointer"},
142-
{
143-
"name": "multimodal_field",
144-
"type": "multimodal_combination",
145-
"dependentFields": {
146-
"text_field_1": 0.1,
147-
"text_field_2": 0.1,
148-
"image_field_1": 0.5,
149-
"video_field_1": 0.1,
150-
"video_field_2": 0.1,
151-
"audio_field_1": 0.1
152-
}
153-
},
154-
{"name": "map_int_score_modifier_field", "type": "map<text, int>", "features": ["score_modifier"]},
155-
{"name": "map_double_score_modifier_field", "type": "map<text, double>", "features": ["score_modifier"]},
156-
{"name": "map_float_score_modifier_field", "type": "map<text, float>", "features": ["score_modifier"]},
157-
{"name": "map_long_score_modifier_field", "type": "map<text, long>", "features": ["score_modifier"]},
158-
],
159-
"tensorFields": ["multimodal_field", "text_field_3", "video_field_3", "audio_field_2", "image_field_2"],
160-
"normalizeEmbeddings": True,
161-
},
162124
}

tests/marqo_test.py

Lines changed: 2 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -209,15 +209,11 @@ def setUpClass(cls) -> None:
209209
cls.unstructured_no_model_index_name = "unstructured_no_model_index"
210210
cls.structured_image_index_name_simple_preprocessing_method = \
211211
"structured_image_index_simple_preprocessing_method"
212-
cls.structured_languagebind_index_name = "structured_languagebind_index"
213212

214213
# TODO: include structured when boolean_field bug for structured is fixed
215214
cls.test_cases = [
216215
(CloudTestIndex.unstructured_image, cls.unstructured_index_name),
217216
]
218-
cls.test_cases_multimodal = [
219-
(CloudTestIndex.structured_languagebind_model, cls.structured_languagebind_index_name)
220-
]
221217

222218
# class property to indicate if test is being run on multi
223219
cls.IS_MULTI_INSTANCE = (True if os.environ.get("IS_MULTI_INSTANCE", False) in ["True", "TRUE", "true", True] else False)
@@ -245,7 +241,7 @@ def setUpClass(cls) -> None:
245241
"indexName": cls.unstructured_image_index_name,
246242
"type": "unstructured",
247243
"treatUrlsAndPointersAsImages": True,
248-
"model": "ViT-B/32",
244+
"model": "open_clip/ViT-B-32/laion400m_e32",
249245
},
250246
{
251247
"indexName": cls.structured_image_index_name,
@@ -256,7 +252,7 @@ def setUpClass(cls) -> None:
256252
{"name": "image_field_1", "type": "image_pointer"},
257253
],
258254
"tensorFields": ["text_field_1", "text_field_2", "text_field_3", "image_field_1"],
259-
"model": "ViT-B/32",
255+
"model": "open_clip/ViT-B-32/laion400m_e32",
260256
},
261257
{
262258
"indexName": cls.unstructured_no_model_index_name,
@@ -268,36 +264,6 @@ def setUpClass(cls) -> None:
268264
"dimensions": 512
269265
}
270266
},
271-
{
272-
"indexName": cls.structured_languagebind_index_name,
273-
"type": "structured",
274-
"model": "LanguageBind/Video_V1.5_FT_Audio_FT_Image",
275-
"allFields": [
276-
{"name": "text_field_1", "type": "text"},
277-
{"name": "text_field_2", "type": "text"},
278-
{"name": "text_field_3", "type": "text"},
279-
{"name": "video_field_1", "type": "video_pointer"},
280-
{"name": "video_field_2", "type": "video_pointer"},
281-
{"name": "video_field_3", "type": "video_pointer"},
282-
{"name": "audio_field_1", "type": "audio_pointer"},
283-
{"name": "audio_field_2", "type": "audio_pointer"},
284-
{"name": "image_field_1", "type": "image_pointer"},
285-
{"name": "image_field_2", "type": "image_pointer"},
286-
{
287-
"name": "multimodal_field",
288-
"type": "multimodal_combination",
289-
"dependentFields": {
290-
"text_field_1": 0.1,
291-
"text_field_2": 0.1,
292-
"image_field_1": 0.5,
293-
"video_field_1": 0.1,
294-
"video_field_2": 0.1,
295-
"audio_field_1": 0.1
296-
}
297-
},
298-
],
299-
"tensorFields": ["multimodal_field", "text_field_3", "video_field_3", "audio_field_2", "image_field_2"]
300-
}
301267
])
302268
except Exception as e:
303269
print("Error creating indexes: ", e)

tests/v2_tests/test_add_documents.py

Lines changed: 1 addition & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -588,9 +588,6 @@ def test_no_model_custom_vector_doc(self):
588588
Note: `no_model` is not yet supported on Cloud.
589589
"""
590590
self.test_cases = [(CloudTestIndex.unstructured_no_model, self.unstructured_no_model_index_name)]
591-
self.test_cases_multimodal = [
592-
(CloudTestIndex.structured_languagebind_model, self.structured_languagebind_index_name)
593-
]
594591

595592
for cloud_test_index_to_use, open_source_test_index_name in self.test_cases:
596593
test_index_name = self.get_test_index_name(
@@ -707,103 +704,6 @@ def test_add_empty_docs_batched(self):
707704
tensor_fields="field a")
708705
assert res == []
709706

710-
def test_add_multimodal_single_documents(self):
711-
documents = [
712-
{
713-
"video_field_3": "https://marqo-k400-video-test-dataset.s3.amazonaws.com/videos/---QUuC4vJs_000084_000094.mp4",
714-
"_id": "1"
715-
},
716-
{
717-
"audio_field_2": "https://marqo-ecs-50-audio-test-dataset.s3.amazonaws.com/audios/marqo-audio-test.mp3",
718-
"_id": "2"
719-
},
720-
{
721-
"image_field_2": "https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png",
722-
"_id": "3"
723-
},
724-
{
725-
"text_field_3": "hello there padawan. Today you will begin your training to be a Jedi",
726-
"_id": "4"
727-
},
728-
]
729-
for cloud_test_index_to_use, open_source_test_index_name in self.test_cases_multimodal:
730-
if "languagebind" not in str(cloud_test_index_to_use):
731-
continue
732-
test_index_name = self.get_test_index_name(
733-
cloud_test_index_to_use=cloud_test_index_to_use,
734-
open_source_test_index_name=open_source_test_index_name
735-
)
736-
with self.subTest(test_index_name):
737-
tensor_fields = ["text_field_3", "image_field_2", "video_field_3", "audio_field_2"] if "unstructured" in test_index_name else None
738-
res = self.client.index(test_index_name).add_documents(documents, tensor_fields=tensor_fields)
739-
print(res)
740-
741-
for item in res['items']:
742-
self.assertEqual(200, item['status'])
743-
744-
get_res = self.client.index(test_index_name).get_documents(
745-
document_ids=["1", "2", "3", "4"],
746-
expose_facets=True
747-
)
748-
print(get_res)
749-
750-
for i, doc in enumerate(get_res['results']):
751-
i += 1
752-
tensor_facets = doc['_tensor_facets']
753-
self.assertIn('_embedding', tensor_facets[0])
754-
self.assertEqual(len(tensor_facets[0]['_embedding']), 768)
755-
756-
def test_add_multimodal_field_document(self):
757-
multimodal_document = [{
758-
"_id": "1_multimodal",
759-
"text_field_1": "New York",
760-
"text_field_2": "Los Angeles",
761-
"image_field_1": "https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png",
762-
"video_field_1": "https://marqo-k400-video-test-dataset.s3.amazonaws.com/videos/---QUuC4vJs_000084_000094.mp4",
763-
"video_field_2": "https://marqo-k400-video-test-dataset.s3.amazonaws.com/videos/---QUuC4vJs_000084_000094.mp4",
764-
"audio_field_1": "https://marqo-ecs-50-audio-test-dataset.s3.amazonaws.com/audios/marqo-audio-test.mp3",
765-
}]
766-
for cloud_test_index_to_use, open_source_test_index_name in self.test_cases_multimodal:
767-
if "languagebind" not in str(cloud_test_index_to_use):
768-
continue
769-
test_index_name = self.get_test_index_name(
770-
cloud_test_index_to_use=cloud_test_index_to_use,
771-
open_source_test_index_name=open_source_test_index_name
772-
)
773-
with self.subTest(test_index_name):
774-
mappings = {
775-
"multimodal_field": {
776-
"type": "multimodal_combination",
777-
"weights": {
778-
"text_field_1": 0.1,
779-
"text_field_2": 0.1,
780-
"image_field_1": 0.5,
781-
"video_field_1": 0.1,
782-
"video_field_2": 0.1,
783-
"audio_field_1": 0.1
784-
},
785-
}
786-
} if "unstructured" in test_index_name else None
787-
tensor_fields = ["multimodal_field"] if "unstructured" in test_index_name else None
788-
res = self.client.index(test_index_name).add_documents(
789-
multimodal_document,
790-
tensor_fields=tensor_fields,
791-
mappings=mappings
792-
)
793-
print(res)
794-
795-
for item in res['items']:
796-
self.assertEqual(200, item['status'])
797-
798-
doc = self.client.index(test_index_name).get_documents(
799-
document_ids=["1_multimodal"],
800-
expose_facets=True
801-
)
802-
print(doc)
803-
804-
self.assertIn('_tensor_facets', doc['results'][0])
805-
self.assertIn('_embedding', doc['results'][0]['_tensor_facets'][0])
806-
self.assertEqual(len(doc['results'][0]['_tensor_facets'][0]['_embedding']), 768)
807707

808708
def test_media_download_headers_is_not_included(self):
809709
"""Ensure newly added attributes mediaDownloadHeaders is not included in the request body."""
@@ -835,4 +735,4 @@ def run():
835735
self.assertEqual({"key": "value-2"}, kwargs["body"]["imageDownloadHeaders"])
836736
self.assertEqual({"key": "value-1"}, kwargs["body"]["mediaDownloadHeaders"])
837737
return True
838-
run()
738+
run()

0 commit comments

Comments
 (0)