-
Notifications
You must be signed in to change notification settings - Fork 4
/
small-scale-experiments.py
64 lines (50 loc) · 1.77 KB
/
small-scale-experiments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
import subprocess
data_path = 'data/'
datasets = [
# ('sift', 'L2'),
('glove', 'mips') # actually angular, but this is equivalent if normalized
]
partitioning_methods = [
'GP',
'RKM',
#'KMeans',
'BalancedKMeans',
]
num_shards_vals = [16]
num_neighbors = 10
build_folders = {
'L2': 'release_l2',
'mips': 'release_mips'
}
def compute_partition(dataset, metric, part_method, num_shards):
arglist = [build_folders[metric] + '/Partition',
os.path.join(data_path, dataset + '.fbin'),
os.path.join(data_path, dataset + '.partition'),
str(num_shards), part_method, 'strong']
print(arglist)
subprocess.call(arglist)
def compute_all_partitions():
for dataset, metric in datasets:
for part_method in partitioning_methods:
for num_shards in num_shards_vals:
compute_partition(dataset, metric, part_method, num_shards)
def run_query_set(dataset, metric, part_method, num_shards):
pfx = os.path.join(data_path, dataset)
sfx = ''
arglist = [build_folders[metric] + '/SmallScaleQueries',
pfx + '.fbin', pfx + '.query.fbin', pfx + '.ground_truth.bin',
str(num_neighbors),
pfx + '.partition.k=' + str(num_shards) + '.' + part_method + sfx,
part_method,
"exp_outputs/" + dataset + "." + part_method + ".k=" + str(num_shards) + '.csv'
]
print(arglist)
subprocess.call(arglist)
def run_queries_on_all_datasets():
for dataset, metric in datasets:
for part_method in partitioning_methods:
for num_shards in num_shards_vals:
run_query_set(dataset, metric, part_method, num_shards)
#compute_all_partitions()
run_queries_on_all_datasets()