3
3
import importlib
4
4
import os
5
5
import glob
6
+ import logging
6
7
from enum import Enum
7
8
from itertools import product
8
9
from typing import Any , Dict , List , Optional , Union
@@ -21,18 +22,19 @@ class Definition:
21
22
query_argument_groups : List [List [Any ]]
22
23
disabled : bool
23
24
25
+
24
26
def instantiate_algorithm (definition : Definition ) -> BaseANN :
25
27
"""
26
28
Create a `BaseANN` from a definition.
27
-
29
+
28
30
Args:
29
31
definition (Definition): An object containing information about the algorithm.
30
32
31
33
Returns:
32
34
BaseANN: Instantiated algorithm
33
35
34
36
Note:
35
- The constructors for the algorithm definition are generally located at
37
+ The constructors for the algorithm definition are generally located at
36
38
ann_benchmarks/algorithms/*/module.py.
37
39
"""
38
40
print (f"Trying to instantiate { definition .module } .{ definition .constructor } ({ definition .arguments } )" )
@@ -52,7 +54,7 @@ def algorithm_status(definition: Definition) -> InstantiationStatus:
52
54
"""
53
55
Determine the instantiation status of the algorithm based on its python module and constructor.
54
56
55
- Attempts to find the Python class constructor based on the definition's module path and
57
+ Attempts to find the Python class constructor based on the definition's module path and
56
58
constructor name.
57
59
58
60
Args:
@@ -68,6 +70,8 @@ def algorithm_status(definition: Definition) -> InstantiationStatus:
68
70
else :
69
71
return InstantiationStatus .NO_CONSTRUCTOR
70
72
except ImportError :
73
+ logging .exception ("Could not import algorithm module for %s" ,
74
+ definition .module )
71
75
return InstantiationStatus .NO_MODULE
72
76
73
77
@@ -103,7 +107,7 @@ def _generate_combinations(args: Union[List[Any], Dict[Any, Any]]) -> List[Union
103
107
def _substitute_variables (arg : Any , vs : Dict [str , Any ]) -> Any :
104
108
"""
105
109
Substitutes any string variables present in the argument structure with provided values.
106
-
110
+
107
111
Support for nested substitution in the case `arg` is a List or Dict.
108
112
109
113
Args:
@@ -160,8 +164,8 @@ def _get_definitions(base_dir: str = "ann_benchmarks/algorithms") -> List[Dict[s
160
164
161
165
def _get_algorithm_definitions (point_type : str , distance_metric : str , base_dir : str = "ann_benchmarks/algorithms" ) -> Dict [str , Dict [str , Any ]]:
162
166
"""Get algorithm definitions for a specific point type and distance metric.
163
-
164
- A specific algorithm folder can have multiple algorithm definitions for a given point type and
167
+
168
+ A specific algorithm folder can have multiple algorithm definitions for a given point type and
165
169
metric. For example, `ann_benchmarks.algorithms.nmslib` has two definitions for euclidean float
166
170
data: specifically `SW-graph(nmslib)` and `hnsw(nmslib)`, even though the module is named nmslib.
167
171
@@ -176,7 +180,7 @@ def _get_algorithm_definitions(point_type: str, distance_metric: str, base_dir:
176
180
"disabled": false,
177
181
"docker_tag": ann-benchmarks-nmslib,
178
182
...
179
- },
183
+ },
180
184
'SW-graph(nmslib)': {
181
185
"base_args": ['@metric', sw-graph],
182
186
"constructor": NmslibReuseIndex,
@@ -205,9 +209,9 @@ def _get_algorithm_definitions(point_type: str, distance_metric: str, base_dir:
205
209
def list_algorithms (base_dir : str = "ann_benchmarks/algorithms" ) -> None :
206
210
"""
207
211
Output (to stdout), a list of all algorithms, with their supported point types and metrics.
208
-
212
+
209
213
Args:
210
- base_dir (str, optional): The base directory where the algorithms are stored.
214
+ base_dir (str, optional): The base directory where the algorithms are stored.
211
215
Defaults to "ann_benchmarks/algorithms".
212
216
"""
213
217
all_configs = _get_definitions (base_dir )
@@ -236,7 +240,7 @@ def list_algorithms(base_dir: str = "ann_benchmarks/algorithms") -> None:
236
240
237
241
def generate_arg_combinations (run_group : Dict [str , Any ], arg_type : str ) -> List :
238
242
"""Generate combinations of arguments from a run group for a specific argument type.
239
-
243
+
240
244
Args:
241
245
run_group (Dict[str, Any]): The run group containing argument definitions.
242
246
arg_type (str): The type of argument group to generate combinations for.
@@ -262,10 +266,10 @@ def generate_arg_combinations(run_group: Dict[str, Any], arg_type: str) -> List:
262
266
263
267
264
268
def prepare_args (run_group : Dict [str , Any ]) -> List :
265
- """For an Algorithm's run group, prepare arguments.
266
-
269
+ """For an Algorithm's run group, prepare arguments.
270
+
267
271
An `arg_groups` is preferenced over an `args` key.
268
-
272
+
269
273
Args:
270
274
run_group (Dict[str, Any]): The run group containing argument definitions.
271
275
@@ -283,7 +287,7 @@ def prepare_args(run_group: Dict[str, Any]) -> List:
283
287
284
288
def prepare_query_args (run_group : Dict [str , Any ]) -> List :
285
289
"""For an algorithm's run group, prepare query args/ query arg groups.
286
-
290
+
287
291
Args:
288
292
run_group (Dict[str, Any]): The run group containing argument definitions.
289
293
@@ -299,28 +303,28 @@ def prepare_query_args(run_group: Dict[str, Any]) -> List:
299
303
def create_definitions_from_algorithm (name : str , algo : Dict [str , Any ], dimension : int , distance_metric : str = "euclidean" , count : int = 10 ) -> List [Definition ]:
300
304
"""
301
305
Create definitions from an indvidual algorithm. An algorithm (e.g. annoy) can have multiple
302
- definitions based on various run groups (see config.ymls for clear examples).
303
-
306
+ definitions based on various run groups (see config.ymls for clear examples).
307
+
304
308
Args:
305
309
name (str): Name of the algorithm.
306
310
algo (Dict[str, Any]): Dictionary with algorithm parameters.
307
311
dimension (int): Dimension of the algorithm.
308
312
distance_metric (str, optional): Distance metric used by the algorithm. Defaults to "euclidean".
309
313
count (int, optional): Count of the definitions to be created. Defaults to 10.
310
-
314
+
311
315
Raises:
312
316
Exception: If the algorithm does not define "docker_tag", "module" or "constructor" properties.
313
-
317
+
314
318
Returns:
315
319
List[Definition]: A list of definitions created from the algorithm.
316
320
"""
317
321
required_properties = ["docker_tag" , "module" , "constructor" ]
318
322
missing_properties = [prop for prop in required_properties if prop not in algo ]
319
323
if missing_properties :
320
324
raise ValueError (f"Algorithm { name } is missing the following properties: { ', ' .join (missing_properties )} " )
321
-
325
+
322
326
base_args = algo .get ("base_args" , [])
323
-
327
+
324
328
definitions = []
325
329
for run_group in algo ["run_groups" ].values ():
326
330
args = prepare_args (run_group )
@@ -336,7 +340,7 @@ def create_definitions_from_algorithm(name: str, algo: Dict[str, Any], dimension
336
340
337
341
vs = {"@count" : count , "@metric" : distance_metric , "@dimension" : dimension }
338
342
current_args = [_substitute_variables (arg , vs ) for arg in current_args ]
339
-
343
+
340
344
definitions .append (
341
345
Definition (
342
346
algorithm = name ,
@@ -369,6 +373,6 @@ def get_definitions(
369
373
definitions .extend (
370
374
create_definitions_from_algorithm (name , algo , dimension , distance_metric , count )
371
375
)
372
-
376
+
373
377
374
378
return definitions
0 commit comments