Skip to content

Commit ce4fe07

Browse files
committed
Update kgn
1 parent 3668191 commit ce4fe07

5 files changed

Lines changed: 45 additions & 83 deletions

File tree

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
11
FROM ann-benchmarks
22

33
RUN apt update
4-
RUN apt install -y git cmake g++ python3 python3-setuptools python3-pip libblas-dev liblapack-dev
5-
RUN pip3 install wheel pybind11 faiss-cpu
4+
RUN apt install -y git cmake g++ python3 python3-setuptools python3-pip libblas-dev liblapack-dev
5+
# RUN echo "ulimit -s unlimited" >> /etc/profile
6+
RUN ulimit -s unlimited
7+
8+
RUN pip3 install --upgrade pip
9+
RUN pip3 install wheel pybind11 faiss-cpu numpy==1.24.2
610

711
WORKDIR /home/app
12+
813
RUN git clone https://github.com/Henry-yan/kgn.git
9-
RUN pip3 install kgn/pykgn-1.0.0-cp310-cp310-linux_x86_64.whl
14+
RUN pip3 install kgn/pykgn-1.0.0-cp310-cp310-linux_x86_64.whl
15+
16+
17+
CMD ["sh", "-c", "ulimit -s unlimited"]

ann_benchmarks/algorithms/kgn/config.yml

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,9 @@ float:
99
run_groups:
1010
Kgn:
1111
args:
12-
L: 100
13-
R: 50
14-
index_type : "KGN"
15-
optimize : true
16-
batch : false
17-
kmeans_ep: 0
18-
kmeans_type: 0
19-
level: [1,2]
20-
query_args: [[5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110, 115]]
12+
R: 160
13+
level: 2
14+
query_args: [[10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110, 120, 140, 150, 160, 180, 200, 300, 500]]
2115
angular:
2216
- base_args: ['@metric','@dimension']
2317
constructor: Kgn
@@ -28,12 +22,6 @@ float:
2822
run_groups:
2923
Kgn:
3024
args:
31-
L: 500
32-
R: 96
33-
index_type : "NSG"
34-
optimize : true
35-
batch : false
36-
kmeans_ep: 0
37-
kmeans_type: 0
25+
R: [128,160]
3826
level: [1,2]
3927
query_args: [[10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100, 105, 110, 115, 200, 300, 400, 500]]
Lines changed: 30 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,20 @@
1-
import psutil
1+
# import psutil
22
import os
3+
import multiprocessing
34
from time import time
4-
from sklearn import preprocessing
55

6-
import pykgn as kgn
76
import numpy as np
87
import faiss
98
from faiss import Kmeans
9+
from sklearn import preprocessing
10+
11+
import pykgn as kgn
1012

1113
from ..base.module import BaseANN
1214

1315

16+
17+
1418
class EPSearcher:
1519
def __init__(self, data: np.ndarray, cur_ep: int) -> None:
1620
self.data = data
@@ -74,72 +78,39 @@ def metric_mapping(metric):
7478
class Kgn(BaseANN):
7579
def __init__(self, metric, dim, method_param):
7680
self.metric = metric_mapping(metric)
77-
self.R = method_param['R']
78-
self.L = method_param['L']
79-
self.index_type = method_param['index_type']
80-
self.optimize = method_param['optimize']
81-
self.batch = method_param['batch']
82-
self.kmeans_ep = method_param['kmeans_ep']
83-
self.kmeans_type = method_param['kmeans_type']
84-
self.level = method_param['level']
8581
self.name = 'kgn_(%s)' % (method_param)
8682
self.dir = 'indices'
87-
self.path = f'{metric}_{dim}_{self.index_type}_R_{self.R}_L_{self.L}.kgn'
88-
83+
self.path = f'{metric}_{dim}.kgn'
84+
self.R = method_param['R'] # [128, 160]
85+
self.level = method_param['level'] # [1, 2]
86+
87+
def build(self, X):
88+
Index = kgn.Index(nb=self.n, dim=self.d, base=X, topK=10, metric=self.metric, level=self.level, R=self.R)
89+
full_path = os.path.join(self.dir, self.path)
90+
Index.build(full_path)
91+
8992
def fit(self, X):
90-
print(self.name, self.level, self.metric)
9193
if self.metric == "IP":
9294
X = preprocessing.normalize(X, "l2", axis=1)
9395
self.d = X.shape[1]
96+
self.n = X.shape[0]
9497
if not os.path.exists(self.dir):
9598
os.mkdir(self.dir)
9699
if self.path not in os.listdir(self.dir):
97-
print("build Index")
98-
p = kgn.Index(self.index_type, dim=self.d,
99-
metric=self.metric, R=self.R, L=self.L)
100-
g = p.build(X,20)
101-
g.save(os.path.join(self.dir, self.path))
102-
del p
103-
del g
104-
105-
# find kmeans centers -- RI
106-
if(self.kmeans_type==0):
107-
RI = np.array([])
108-
elif(self.kmeans_type==2):
109-
t = time()
110-
kmeans_ep_searcher = EPSearcherKmeans_re(X, 0, self.kmeans_ep, self.metric)
111-
T = time() - t
112-
print("Time of bi_kmeans = ", T, " k=", self.kmeans_ep)
113-
RI = kmeans_ep_searcher.get_cent()
114-
else:
115-
print("Error: no such kmeans algorithm in main_opt.py")
116-
print("kmeans_ep", self.kmeans_ep)
117-
g = kgn.Graph()
118-
g.load(os.path.join(self.dir, self.path))
119-
if self.level == 1:
120-
self.searcher = kgn.Searcher(g, X, self.metric, "SQ8U",20)
121-
elif self.level == 2:
122-
self.searcher = kgn.Searcher(g, X, self.metric, "SQ4U",20)
123-
print("Make Searcher")
124-
125-
if self.optimize:
126-
if self.batch:
127-
if self.level <= 4:
128-
self.searcher.optimize()
129-
else:
130-
print(self.level, "no needs optimized")
131-
pass
100+
full_path = os.path.join(self.dir, self.path)
101+
self.Index = kgn.Index(nb=self.n, dim=self.d, base=X, topK=10, metric=self.metric, level=self.level, R=self.R)
102+
if os.path.exists(full_path) and os.path.isfile(full_path):
103+
print("load Index")
104+
self.Index.load(full_path)
132105
else:
133-
if self.level <= 4:
134-
self.searcher.optimize(1)
135-
else:
136-
print(self.level, "no needs optimized")
137-
pass
138-
print("Optimize Parameters")
139-
106+
print("build Index")
107+
p = multiprocessing.Process(target=self.build, args=(X, ))
108+
p.start()
109+
p.join()
110+
self.Index.load(full_path)
111+
140112

141113
def set_query_arguments(self, ef):
142-
self.searcher.set_ef(ef)
143114
self.ef = ef
144115

145116
def prepare_query(self, q, n):
@@ -149,15 +120,10 @@ def prepare_query(self, q, n):
149120
self.n = n
150121

151122
def run_prepared_query(self):
152-
if self.level <= 3:
153-
self.res = self.searcher.search(
154-
self.q, self.n)
155-
else:
156-
self.res = self.searcher.search(
157-
self.q, self.n)
123+
self.res = self.Index.search(self.ef, self.q)
158124

159125
def get_prepared_query_results(self):
160126
return self.res
161127

162128
def freeIndex(self):
163-
del self.searcher
129+
del self.Index

results/random-xs-20-angular.png

56.5 KB
Loading

results/random-xs-20-euclidean.png

66.3 KB
Loading

0 commit comments

Comments
 (0)