masajiro · Apr 29, 2021
diff --git a/‎algos.yaml
+21 b/‎algos.yaml
+21
diff --git a/‎benchmark/__init__.py
+1 b/‎benchmark/__init__.py
+1
diff --git a/‎benchmark/algorithms/base.py
+19-5 b/‎benchmark/algorithms/base.py
+19-5
diff --git a/‎benchmark/algorithms/definitions.py
+173 b/‎benchmark/algorithms/definitions.py
+173
diff --git a/‎benchmark/algorithms/faiss.py b/‎benchmark/algorithms/faiss.py
diff --git a/‎benchmark/algorithms/faiss_inmem.py
+75 b/‎benchmark/algorithms/faiss_inmem.py
+75
diff --git a/‎benchmark/datasets.py
+6-7 b/‎benchmark/datasets.py
+6-7
diff --git a/‎benchmark/distances.py
+16 b/‎benchmark/distances.py
+16
diff --git a/‎benchmark/main.py
+185 b/‎benchmark/main.py
+185
diff --git a/‎benchmark/results.py
+56 b/‎benchmark/results.py
+56
diff --git a/‎benchmark/runner.py
+216 b/‎benchmark/runner.py
+216
diff --git a/‎install/Dockerfile
+3-2 b/‎install/Dockerfile
+3-2
diff --git a/‎install/Dockerfile.faiss
+1-1 b/‎install/Dockerfile.faiss
+1-1
diff --git a/‎logging.conf
+34 b/‎logging.conf
+34
diff --git a/‎plot.py
+154 b/‎plot.py
+154
diff --git a/‎run.py
+6 b/‎run.py
+6
diff --git a/‎run_algorithm.py
+3 b/‎run_algorithm.py
+3
@@ -0,0 +1,21 @@
+float:
+  any:
+    faiss-ivf:
+      docker-tag: billion-scale-benchmark-faiss
+      module: benchmark.algorithms.faiss_inmem
+      constructor: FaissIVF
+      base-args: ["@metric"]
+      run-groups:
+        base:
+          args: [[1024,2048,4096,8192]]
+          query-args: [[1, 5, 10, 50, 100, 200]]
+  euclidean:
+    faiss-ivf:
+      docker-tag: billion-scale-benchmark-faiss
+      module: benchmark.algorithms.faiss_inmem
+      constructor: FaissIVF
+      base-args: ["euclidean"]
+      run-groups:
+        base:
+          args: [[1024]]
+          query-args: [[1, 5, 10, 50, 100, 200]]
@@ -0,0 +1 @@
+from __future__ import absolute_import
@@ -1,18 +1,25 @@
 from __future__ import absolute_import
+import psutil
 
 class BaseANN(object):
     def done(self):
         pass
 
-    def fit(self, X, name):
+    def fit(self, dataset):
         """
-        Build the index for the data points given as X.
-        Pass name as well to store index.
+        Build the index for the data points given in dataset name.
+        Assumes that after fitting index is loaded in memory.
         """
         pass
 
-    def load_index(self, name):
-        """Load the index from name."""
+    def load_index(self, dataset):
+        """
+        Load the index for dataset. Returns False if index
+        is not available, True otherwise.
+
+        Checking the index usually involves the dataset name
+        and the index build paramters passed during construction.
+        """
         pass
 
     def query(self, X, k):
@@ -40,3 +47,10 @@ def get_additional(self):
 
     def __str__(self):
         return self.name
+
+    def get_memory_usage(self):
+        """Return the current memory usage of this algorithm instance
+        (in kilobytes), or None if this information is not available."""
+        # return in kB for backwards compatibility
+        return psutil.Process().memory_info().rss / 1024
+
@@ -0,0 +1,173 @@
+from __future__ import absolute_import
+from os import sep as pathsep
+import collections
+import importlib
+import os
+import sys
+import traceback
+import yaml
+from enum import Enum
+from itertools import product
+
+
+Definition = collections.namedtuple(
+    'Definition',
+    ['algorithm', 'constructor', 'module', 'docker_tag',
+     'arguments', 'query_argument_groups', 'disabled'])
+
+
+def instantiate_algorithm(definition):
+    print('Trying to instantiate %s.%s(%s)' %
+          (definition.module, definition.constructor, definition.arguments))
+    module = importlib.import_module(definition.module)
+    constructor = getattr(module, definition.constructor)
+    return constructor(*definition.arguments)
+
+
+class InstantiationStatus(Enum):
+    AVAILABLE = 0
+    NO_CONSTRUCTOR = 1
+    NO_MODULE = 2
+
+
+def algorithm_status(definition):
+    try:
+        module = importlib.import_module(definition.module)
+        if hasattr(module, definition.constructor):
+            return InstantiationStatus.AVAILABLE
+        else:
+            return InstantiationStatus.NO_CONSTRUCTOR
+    except ImportError:
+        return InstantiationStatus.NO_MODULE
+
+
+def _generate_combinations(args):
+    if isinstance(args, list):
+        args = [el if isinstance(el, list) else [el] for el in args]
+        return [list(x) for x in product(*args)]
+    elif isinstance(args, dict):
+        flat = []
+        for k, v in args.items():
+            if isinstance(v, list):
+                flat.append([(k, el) for el in v])
+            else:
+                flat.append([(k, v)])
+        return [dict(x) for x in product(*flat)]
+    else:
+        raise TypeError("No args handling exists for %s" % type(args).__name__)
+
+
+def _substitute_variables(arg, vs):
+    if isinstance(arg, dict):
+        return dict([(k, _substitute_variables(v, vs))
+                     for k, v in arg.items()])
+    elif isinstance(arg, list):
+        return [_substitute_variables(a, vs) for a in arg]
+    elif isinstance(arg, str) and arg in vs:
+        return vs[arg]
+    else:
+        return arg
+
+
+def _get_definitions(definition_file):
+    with open(definition_file, "r") as f:
+        return yaml.load(f, yaml.SafeLoader)
+
+
+def list_algorithms(definition_file):
+    definitions = _get_definitions(definition_file)
+
+    print('The following algorithms are supported...')
+    for point in definitions:
+        print('\t... for the point type "%s"...' % point)
+        for metric in definitions[point]:
+            print('\t\t... and the distance metric "%s":' % metric)
+            for algorithm in definitions[point][metric]:
+                print('\t\t\t%s' % algorithm)
+
+
+def get_unique_algorithms(definition_file):
+    definitions = _get_definitions(definition_file)
+    algos = set()
+    for point in definitions:
+        for metric in definitions[point]:
+            for algorithm in definitions[point][metric]:
+                algos.add(algorithm)
+    return list(sorted(algos))
+
+
+def get_definitions(definition_file, dimension, point_type="float",
+                    distance_metric="euclidean", count=10):
+    definitions = _get_definitions(definition_file)
+
+    algorithm_definitions = {}
+    if "any" in definitions[point_type]:
+        algorithm_definitions.update(definitions[point_type]["any"])
+    algorithm_definitions.update(definitions[point_type][distance_metric])
+
+    definitions = []
+    for (name, algo) in algorithm_definitions.items():
+        for k in ['docker-tag', 'module', 'constructor']:
+            if k not in algo:
+                raise Exception(
+                    'algorithm %s does not define a "%s" property' % (name, k))
+
+        base_args = []
+        if "base-args" in algo:
+            base_args = algo["base-args"]
+
+        for run_group in algo["run-groups"].values():
+            if "arg-groups" in run_group:
+                groups = []
+                for arg_group in run_group["arg-groups"]:
+                    if isinstance(arg_group, dict):
+                        # Dictionaries need to be expanded into lists in order
+                        # for the subsequent call to _generate_combinations to
+                        # do the right thing
+                        groups.append(_generate_combinations(arg_group))
+                    else:
+                        groups.append(arg_group)
+                args = _generate_combinations(groups)
+            elif "args" in run_group:
+                args = _generate_combinations(run_group["args"])
+            else:
+                assert False, "? what? %s" % run_group
+
+            if "query-arg-groups" in run_group:
+                groups = []
+                for arg_group in run_group["query-arg-groups"]:
+                    if isinstance(arg_group, dict):
+                        groups.append(_generate_combinations(arg_group))
+                    else:
+                        groups.append(arg_group)
+                query_args = _generate_combinations(groups)
+            elif "query-args" in run_group:
+                query_args = _generate_combinations(run_group["query-args"])
+            else:
+                query_args = []
+
+            for arg_group in args:
+                aargs = []
+                aargs.extend(base_args)
+                if isinstance(arg_group, list):
+                    aargs.extend(arg_group)
+                else:
+                    aargs.append(arg_group)
+
+                vs = {
+                    "@count": count,
+                    "@metric": distance_metric,
+                    "@dimension": dimension
+                }
+                aargs = [_substitute_variables(arg, vs) for arg in aargs]
+                definitions.append(Definition(
+                    algorithm=name,
+                    docker_tag=algo['docker-tag'],
+                    module=algo['module'],
+                    constructor=algo['constructor'],
+                    arguments=aargs,
+                    query_argument_groups=query_args,
+                    disabled=algo.get('disabled', False)
+                ))
+
+    return definitions
@@ -0,0 +1,75 @@
+from __future__ import absolute_import
+#import sys
+#sys.path.append("install/lib-faiss")  # noqa
+import numpy
+import sklearn.preprocessing
+import ctypes
+import faiss
+import os
+from benchmark.algorithms.base import BaseANN
+from benchmark.datasets import DATASETS
+
+
+class Faiss(BaseANN):
+    def query(self, X, n):
+        if self._metric == 'angular':
+            X /= numpy.linalg.norm(X)
+        self.res = self.index.search(X.astype(numpy.float32), n)
+
+    def get_results(self):
+        D, I = self.res
+        return I
+#        res = []
+#        for i in range(len(D)):
+#            r = []
+#            for l, d in zip(L[i], D[i]):
+#                if l != -1:
+#                    r.append(l)
+#            res.append(r)
+#        return res
+
+
+class FaissIVF(Faiss):
+    def __init__(self, metric, n_list):
+        self._n_list = n_list
+        self._metric = metric
+
+    def index_name(self, name):
+        return f"data/ivf_{name}_{self._n_list}_{self._metric}"
+
+    def fit(self, dataset):
+        X = DATASETS[dataset].get_dataset() # assumes it fits into memory
+
+        if self._metric == 'angular':
+            X = sklearn.preprocessing.normalize(X, axis=1, norm='l2')
+
+        if X.dtype != numpy.float32:
+            X = X.astype(numpy.float32)
+
+        self.quantizer = faiss.IndexFlatL2(X.shape[1])
+        index = faiss.IndexIVFFlat(
+            self.quantizer, X.shape[1], self._n_list, faiss.METRIC_L2)
+        index.train(X)
+        index.add(X)
+        faiss.write_index(index, self.index_name(dataset))
+        self.index = index
+
+    def load_index(self, dataset):
+        if not os.path.exists(self.index_name(dataset)):
+            return False
+
+        self.index = faiss.read_index(self.index_name(dataset))
+        return True
+
+    def set_query_arguments(self, n_probe):
+        faiss.cvar.indexIVF_stats.reset()
+        self._n_probe = n_probe
+        self.index.nprobe = self._n_probe
+
+    def get_additional(self):
+        return {"dist_comps": faiss.cvar.indexIVF_stats.ndis +      # noqa
+                faiss.cvar.indexIVF_stats.nq * self._n_list}
+
+    def __str__(self):
+        return 'FaissIVF(n_list=%d, n_probe=%d)' % (self._n_list,
+                                                    self._n_probe)
@@ -1,4 +1,3 @@
-import h5py
 import numpy
 import os
 import random
@@ -67,7 +66,7 @@ def get_groundtruth(self, k=None):
         for each query."""
         pass
 
-    def query_type(self):
+    def search_type(self):
         """
         "knn" or "range"
         """
@@ -156,9 +155,9 @@ def get_groundtruth(self, k=None):
         return gt
 
     def distance(self):
-        return "Euclidean"
+        return "euclidean"
 
-    def query_type(self):
+    def search_type(self):
         return "knn"
 
     def __str__(self):
@@ -204,11 +203,11 @@ def get_groundtruth(self, k=None):
             gt = gt[:, :k]
         return gt
 
-    def query_type(self):
+    def search_type(self):
         return "knn"
 
     def distance(self):
-        return "Euclidean"
+        return "euclidean"
 
     def __str__(self):
         return f"Deep1B"
@@ -226,7 +225,7 @@ def __init__(self, nb_M=1000):
         self.base_url = "https://storage.yandexcloud.net/yandex-research/ann-datasets/T2I/"
 
     def distance(self):
-        return "IP"
+        return "ip"
 
     def __str__(self):
         return f"TextToImage"
 
@@ -0,0 +1,16 @@
+from scipy.spatial.distance import pdist as scipy_pdist
+import itertools
+import numpy as np
+
+def pdist(a, b, metric):
+    return scipy_pdist([a, b], metric=metric)[0]
+
+metrics = {
+    'euclidean': {
+        'distance': lambda a, b: pdist(a, b, "euclidean"),
+    },
+    'angular': {
+        'distance': lambda a, b: pdist(a, b, "cosine"),
+    }
+}
+
@@ -0,0 +1,185 @@
+from __future__ import absolute_import
+import argparse
+import logging
+import logging.config
+
+import docker
+import multiprocessing.pool
+import os
+import psutil
+import random
+import shutil
+import sys
+import traceback
+
+from benchmark.datasets import DATASETS
+from benchmark.algorithms.definitions import (get_definitions,
+                                                   list_algorithms,
+                                                   algorithm_status,
+                                                   InstantiationStatus)
+from benchmark.results import get_result_filename
+from benchmark.runner import run, run_docker
+
+
+def positive_int(s):
+    i = None
+    try:
+        i = int(s)
+    except ValueError:
+        pass
+    if not i or i < 1:
+        raise argparse.ArgumentTypeError("%r is not a positive integer" % s)
+    return i
+
+
+def run_worker(args, queue):
+    while not queue.empty():
+        definition = queue.get()
+        memory_margin = 500e6  # reserve some extra memory for misc stuff
+        mem_limit = int((psutil.virtual_memory().available - memory_margin))
+        #mem_limit = 128e9 # 128gb for competition
+        cpu_limit = "0-%d" % (multiprocessing.cpu_count() - 1)
+
+        run_docker(definition, args.dataset, args.count,
+                   args.runs, args.timeout, cpu_limit, mem_limit)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument(
+        '--dataset',
+        metavar='NAME',
+        help='the dataset to load training points from',
+        default='sift-1M',
+        choices=DATASETS.keys())
+    parser.add_argument(
+        "-k", "--count",
+        default=10,
+        type=positive_int,
+        help="the number of near neighbours to search for")
+    parser.add_argument(
+        '--definitions',
+        metavar='FILE',
+        help='load algorithm definitions from FILE',
+        default='algos.yaml')
+    parser.add_argument(
+        '--algorithm',
+        metavar='NAME',
+        help='run only the named algorithm',
+        default=None)
+    parser.add_argument(
+        '--docker-tag',
+        metavar='NAME',
+        help='run only algorithms in a particular docker image',
+        default=None)
+    parser.add_argument(
+        '--list-algorithms',
+        help='print the names of all known algorithms and exit',
+        action='store_true')
+    parser.add_argument(
+        '--force',
+        help='re-run algorithms even if their results already exist',
+        action='store_true')
+    parser.add_argument(
+        '--runs',
+        metavar='COUNT',
+        type=positive_int,
+        help='run each algorithm instance %(metavar)s times and use only'
+             ' the best result',
+        default=5)
+    parser.add_argument(
+        '--timeout',
+        type=int,
+        help='Timeout (in seconds) for each individual algorithm run, or -1'
+             'if no timeout should be set',
+        default=12 * 3600)
+    parser.add_argument(
+        '--max-n-algorithms',
+        type=int,
+        help='Max number of algorithms to run (just used for testing)',
+        default=-1)
+
+    args = parser.parse_args()
+    if args.timeout == -1:
+        args.timeout = None
+
+    if args.list_algorithms:
+        list_algorithms(args.definitions)
+        sys.exit(0)
+
+    logging.config.fileConfig("logging.conf")
+    logger = logging.getLogger("annb")
+
+    dataset = DATASETS[args.dataset]
+    dimension = dataset.d
+    point_type = 'float'
+    distance = dataset.distance()
+    definitions = get_definitions(
+        args.definitions, dimension, point_type, distance, args.count)
+
+    # Filter out, from the loaded definitions, all those query argument groups
+    # that correspond to experiments that have already been run. (This might
+    # mean removing a definition altogether, so we can't just use a list
+    # comprehension.)
+    filtered_definitions = []
+    for definition in definitions:
+        query_argument_groups = definition.query_argument_groups
+        if not query_argument_groups:
+            query_argument_groups = [[]]
+        not_yet_run = []
+        for query_arguments in query_argument_groups:
+            fn = get_result_filename(args.dataset,
+                                     args.count, definition,
+                                     query_arguments)
+            if args.force or not os.path.exists(fn):
+                not_yet_run.append(query_arguments)
+        if not_yet_run:
+            if definition.query_argument_groups:
+                definition = definition._replace(
+                    query_argument_groups=not_yet_run)
+            filtered_definitions.append(definition)
+    definitions = filtered_definitions
+
+    random.shuffle(definitions)
+
+    if args.algorithm:
+        logger.info(f'running only {args.algorithm}')
+        definitions = [d for d in definitions if d.algorithm == args.algorithm]
+
+    # See which Docker images we have available
+    docker_client = docker.from_env()
+    docker_tags = set()
+    for image in docker_client.images.list():
+        for tag in image.tags:
+            tag = tag.split(':')[0]
+            docker_tags.add(tag)
+
+    if args.docker_tag:
+        logger.info(f'running only {args.docker_tag}')
+        definitions = [
+            d for d in definitions if d.docker_tag == args.docker_tag]
+
+    if set(d.docker_tag for d in definitions).difference(docker_tags):
+        logger.info(f'not all docker images available, only: {set(docker_tags)}')
+        logger.info(f'missing docker images: '
+                    f'{str(set(d.docker_tag for d in definitions).difference(docker_tags))}')
+        definitions = [
+            d for d in definitions if d.docker_tag in docker_tags]
+
+    if args.max_n_algorithms >= 0:
+        definitions = definitions[:args.max_n_algorithms]
+
+    if len(definitions) == 0:
+        raise Exception('Nothing to run')
+    else:
+        logger.info(f'Order: {definitions}')
+
+    queue = multiprocessing.Queue()
+    for definition in definitions:
+        queue.put(definition)
+    #run_worker(args, queue)
+    workers = [multiprocessing.Process(target=run_worker, args=(args, queue))
+               for i in range(1)]
+    [worker.start() for worker in workers]
+    [worker.join() for worker in workers]
@@ -0,0 +1,56 @@
+from __future__ import absolute_import
+
+import h5py
+import json
+import os
+import re
+import traceback
+
+
+def get_result_filename(dataset=None, count=None, definition=None,
+                        query_arguments=None):
+    d = ['results']
+    if dataset:
+        d.append(dataset)
+    if count:
+        d.append(str(count))
+    if definition:
+        d.append(definition.algorithm)
+        data = definition.arguments + query_arguments
+        d.append(re.sub(r'\W+', '_', json.dumps(data, sort_keys=True))
+                 .strip('_'))
+    return os.path.join(*d)
+
+
+def store_results(dataset, count, definition, query_arguments, attrs, results):
+    fn = get_result_filename(
+        dataset, count, definition, query_arguments) + '.hdf5'
+    head, tail = os.path.split(fn)
+    if not os.path.isdir(head):
+        os.makedirs(head)
+    f = h5py.File(fn, 'w')
+    for k, v in attrs.items():
+        f.attrs[k] = v
+    neighbors = f.create_dataset('neighbors', (len(results), count), 'i')
+    for i, idxs in enumerate(results):
+        neighbors[i] = idxs
+    f.close()
+
+
+def load_all_results(dataset=None, count=None):
+    for root, _, files in os.walk(get_result_filename(dataset, count)):
+        for fn in files:
+            if os.path.splitext(fn)[-1] != '.hdf5':
+                continue
+            try:
+                f = h5py.File(os.path.join(root, fn), 'r+')
+                properties = dict(f.attrs)
+                yield properties, f
+                f.close()
+            except:
+                print('Was unable to read', fn)
+                traceback.print_exc()
+
+
+def get_unique_algorithms():
+    return set(properties['algo'] for properties, _ in load_all_results())
@@ -0,0 +1,216 @@
+import argparse
+import json
+import logging
+import os
+import threading
+import time
+import traceback
+
+import colors
+import docker
+import numpy
+import psutil
+
+from benchmark.algorithms.definitions import (Definition,
+                                               instantiate_algorithm)
+from benchmark.datasets import DATASETS
+from benchmark.results import store_results
+
+
+def run_individual_query(algo, X, distance, count, run_count, search_type):
+    best_search_time = float('inf')
+    for i in range(run_count):
+        print('Run %d/%d...' % (i + 1, run_count))
+
+        start = time.time()
+        if search_type == "knn":
+            algo.query(X, count)
+        else:
+            algo.range_query(X, count)
+        total = (time.time() - start)
+
+        results = algo.get_results()
+
+        search_time = total
+        best_search_time = min(best_search_time, search_time)
+
+    attrs = {
+        "best_search_time": best_search_time,
+        "name": str(algo),
+        "run_count": run_count,
+        "distance": distance,
+        "type": search_type,
+        "count": int(count)
+    }
+    additional = algo.get_additional()
+    for k in additional:
+        attrs[k] = additional[k]
+    return (attrs, results)
+
+
+def run(definition, dataset, count, run_count):
+    algo = instantiate_algorithm(definition)
+    assert not definition.query_argument_groups \
+           or hasattr(algo, "set_query_arguments"), """\
+error: query argument groups have been specified for %s.%s(%s), but the \
+algorithm instantiated from it does not implement the set_query_arguments \
+function""" % (definition.module, definition.constructor, definition.arguments)
+
+    ds = DATASETS[dataset]
+    #X_train = numpy.array(D['train'])
+    X =  ds.get_queries()
+    distance = ds.distance()
+    search_type = ds.search_type()
+    print(f"Running {definition.algorithm} on {dataset}")
+    print(fr"Got {len(X)} queries")
+
+    try:
+        # Try loading the index from the file
+        memory_usage_before = algo.get_memory_usage()
+        if not algo.load_index(dataset):
+            # Build the index if it is not available
+            t0 = time.time()
+            algo.fit(dataset)
+            build_time = time.time() - t0
+            print('Built index in', build_time)
+
+        index_size = algo.get_memory_usage() - memory_usage_before
+        print('Index size: ', index_size)
+
+        query_argument_groups = definition.query_argument_groups
+        # Make sure that algorithms with no query argument groups still get run
+        # once by providing them with a single, empty, harmless group
+        if not query_argument_groups:
+            query_argument_groups = [[]]
+
+        for pos, query_arguments in enumerate(query_argument_groups, 1):
+            print("Running query argument group %d of %d..." %
+                  (pos, len(query_argument_groups)))
+            if query_arguments:
+                algo.set_query_arguments(*query_arguments)
+            descriptor, results = run_individual_query(
+                algo, X, distance, count, run_count, search_type)
+            # A bit unclear how to set this correctly if we usually load from file
+            #descriptor["build_time"] = build_time
+            descriptor["index_size"] = index_size
+            descriptor["algo"] = definition.algorithm
+            descriptor["dataset"] = dataset
+            store_results(dataset, count, definition,
+                    query_arguments, descriptor, results)
+    finally:
+        algo.done()
+
+
+def run_from_cmdline():
+    parser = argparse.ArgumentParser('''
+
+            NOTICE: You probably want to run.py rather than this script.
+
+''')
+    parser.add_argument(
+        '--dataset',
+        choices=DATASETS.keys(),
+        help=f'Dataset to benchmark on.',
+        required=True)
+    parser.add_argument(
+        '--algorithm',
+        help='Name of algorithm for saving the results.',
+        required=True)
+    parser.add_argument(
+        '--module',
+        help='Python module containing algorithm. E.g. "ann_benchmarks.algorithms.annoy"',
+        required=True)
+    parser.add_argument(
+        '--constructor',
+        help='Constructer to load from module. E.g. "Annoy"',
+        required=True)
+    parser.add_argument(
+        '--count',
+        help='k: Number of nearest neighbours for the algorithm to return.',
+        required=True,
+        type=int)
+    parser.add_argument(
+        '--runs',
+        help='Number of times to run the algorihm. Will use the fastest run-time over the bunch.',
+        required=True,
+        type=int)
+    parser.add_argument(
+        'build',
+        help='JSON of arguments to pass to the constructor. E.g. ["angular", 100]'
+        )
+    parser.add_argument(
+        'queries',
+        help='JSON of arguments to pass to the queries. E.g. [100]',
+        nargs='*',
+        default=[])
+    args = parser.parse_args()
+    algo_args = json.loads(args.build)
+    print(algo_args)
+    query_args = [json.loads(q) for q in args.queries]
+
+    definition = Definition(
+        algorithm=args.algorithm,
+        docker_tag=None,  # not needed
+        module=args.module,
+        constructor=args.constructor,
+        arguments=algo_args,
+        query_argument_groups=query_args,
+        disabled=False
+    )
+    run(definition, args.dataset, args.count, args.runs)
+
+
+def run_docker(definition, dataset, count, runs, timeout, cpu_limit,
+               mem_limit=None):
+    cmd = ['--dataset', dataset,
+           '--algorithm', definition.algorithm,
+           '--module', definition.module,
+           '--constructor', definition.constructor,
+           '--runs', str(runs),
+           '--count', str(count)]
+    cmd.append(json.dumps(definition.arguments))
+    cmd += [json.dumps(qag) for qag in definition.query_argument_groups]
+
+    client = docker.from_env()
+    if mem_limit is None:
+        mem_limit = psutil.virtual_memory().available
+
+    container = client.containers.run(
+        definition.docker_tag,
+        cmd,
+        volumes={
+            os.path.abspath('benchmark'):
+                {'bind': '/home/app/benchmark', 'mode': 'ro'},
+            os.path.abspath('data'):
+                {'bind': '/home/app/data', 'mode': 'rw'},
+            os.path.abspath('results'):
+                {'bind': '/home/app/results', 'mode': 'rw'},
+        },
+        cpuset_cpus=cpu_limit,
+        mem_limit=mem_limit,
+        detach=True)
+    logger = logging.getLogger(f"annb.{container.short_id}")
+
+    logger.info('Created container %s: CPU limit %s, mem limit %s, timeout %d, command %s' % \
+                (container.short_id, cpu_limit, mem_limit, timeout, cmd))
+
+    def stream_logs():
+        for line in container.logs(stream=True):
+            logger.info(colors.color(line.decode().rstrip(), fg='blue'))
+
+    t = threading.Thread(target=stream_logs, daemon=True)
+    t.start()
+
+    try:
+        exit_code = container.wait(timeout=timeout)
+
+        # Exit if exit code
+        if exit_code not in [0, None]:
+            logger.error(colors.color(container.logs().decode(), fg='red'))
+            logger.error('Child process for container %s raised exception %d' % (container.short_id, exit_code))
+    except:
+        logger.error('Container.wait for container %s failed with exception' % container.short_id)
+        logger.error('Invoked with %s' % cmd)
+        traceback.print_exc()
+    finally:
+        container.remove(force=True)
@@ -5,6 +5,7 @@ RUN apt-get install -y python3-numpy python3-scipy python3-pip build-essential g
 RUN pip3 install -U pip
 
 WORKDIR /home/app
-#COPY requirements.txt run_algorithm.py ./
-COPY requirements.txt ./
+COPY requirements.txt run_algorithm.py ./
 RUN pip3 install -rrequirements.txt
+
+ENTRYPOINT ["python3", "run_algorithm.py"]
@@ -1,4 +1,4 @@
-FROM ann-benchmarks
+FROM billion-scale-benchmark
 
 RUN apt-get update && apt-get install -y libopenblas-base libopenblas-dev libpython3-dev swig python3-dev libssl-dev wget
 RUN wget https://github.com/Kitware/CMake/releases/download/v3.18.3/cmake-3.18.3-Linux-x86_64.sh && mkdir cmake && sh cmake-3.18.3-Linux-x86_64.sh --skip-license --prefix=cmake && rm cmake-3.18.3-Linux-x86_64.sh
 
@@ -0,0 +1,34 @@
+[loggers]
+keys=root,annb
+
+[handlers]
+keys=consoleHandler,fileHandler
+
+[formatters]
+keys=simpleFormatter
+
+[formatter_simpleFormatter]
+format=%(asctime)s - %(name)s - %(levelname)s - %(message)s
+datefmt=
+
+[handler_consoleHandler]
+class=StreamHandler
+level=INFO
+formatter=simpleFormatter
+args=(sys.stdout,)
+
+[handler_fileHandler]
+class=FileHandler
+level=INFO
+formatter=simpleFormatter
+args=('annb.log','w')
+
+[logger_root]
+level=WARN
+handlers=consoleHandler
+
+[logger_annb]
+level=INFO
+handlers=consoleHandler,fileHandler
+qualname=annb
+propagate=0
@@ -0,0 +1,154 @@
+import os
+import matplotlib as mpl
+mpl.use('Agg')  # noqa
+import matplotlib.pyplot as plt
+import numpy as np
+import argparse
+
+from benchmark.datasets import DATASETS
+from benchmark.algorithms.definitions import get_definitions
+from benchmark.plotting.metrics import all_metrics as metrics
+from benchmark.plotting.utils import (get_plot_label, compute_metrics,
+        create_linestyles, create_pointset)
+from benchmark.results import (store_results, load_all_results,
+                            get_unique_algorithms)
+
+
+def create_plot(all_data, raw, x_scale, y_scale, xn, yn, fn_out, linestyles):
+    xm, ym = (metrics[xn], metrics[yn])
+    # Now generate each plot
+    handles = []
+    labels = []
+    plt.figure(figsize=(12, 9))
+
+    # Sorting by mean y-value helps aligning plots with labels
+    def mean_y(algo):
+        xs, ys, ls, axs, ays, als = create_pointset(all_data[algo], xn, yn)
+        return -np.log(np.array(ys)).mean()
+    # Find range for logit x-scale
+    min_x, max_x = 1, 0
+    for algo in sorted(all_data.keys(), key=mean_y):
+        xs, ys, ls, axs, ays, als = create_pointset(all_data[algo], xn, yn)
+        min_x = min([min_x]+[x for x in xs if x > 0])
+        max_x = max([max_x]+[x for x in xs if x < 1])
+        color, faded, linestyle, marker = linestyles[algo]
+        handle, = plt.plot(xs, ys, '-', label=algo, color=color,
+                           ms=7, mew=3, lw=3, linestyle=linestyle,
+                           marker=marker)
+        handles.append(handle)
+        if raw:
+            handle2, = plt.plot(axs, ays, '-', label=algo, color=faded,
+                                ms=5, mew=2, lw=2, linestyle=linestyle,
+                                marker=marker)
+        labels.append(algo)
+
+    ax = plt.gca()
+    ax.set_ylabel(ym['description'])
+    ax.set_xlabel(xm['description'])
+    # Custom scales of the type --x-scale a3
+    if x_scale[0] == 'a':
+        alpha = int(x_scale[1:])
+        fun = lambda x: 1-(1-x)**(1/alpha)
+        inv_fun = lambda x: 1-(1-x)**alpha
+        ax.set_xscale('function', functions=(fun, inv_fun))
+        if alpha <= 3:
+            ticks = [inv_fun(x) for x in np.arange(0,1.2,.2)]
+            plt.xticks(ticks)
+        if alpha > 3:
+            from matplotlib import ticker
+            ax.xaxis.set_major_formatter(ticker.LogitFormatter())
+            #plt.xticks(ticker.LogitLocator().tick_values(min_x, max_x))
+            plt.xticks([0, 1/2, 1-1e-1, 1-1e-2, 1-1e-3, 1-1e-4, 1])
+    # Other x-scales
+    else:
+        ax.set_xscale(x_scale)
+    ax.set_yscale(y_scale)
+    ax.set_title(get_plot_label(xm, ym))
+    box = plt.gca().get_position()
+    # plt.gca().set_position([box.x0, box.y0, box.width * 0.8, box.height])
+    ax.legend(handles, labels, loc='center left',
+              bbox_to_anchor=(1, 0.5), prop={'size': 9})
+    plt.grid(b=True, which='major', color='0.65', linestyle='-')
+    plt.setp(ax.get_xminorticklabels(), visible=True)
+
+    # Logit scale has to be a subset of (0,1)
+    if 'lim' in xm and x_scale != 'logit':
+        x0, x1 = xm['lim']
+        plt.xlim(max(x0,0), min(x1,1))
+    elif x_scale == 'logit':
+        plt.xlim(min_x, max_x)
+    if 'lim' in ym:
+        plt.ylim(ym['lim'])
+
+    # Workaround for bug https://github.com/matplotlib/matplotlib/issues/6789
+    ax.spines['bottom']._adjust_location()
+
+    plt.savefig(fn_out, bbox_inches='tight')
+    plt.close()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--dataset',
+        metavar="DATASET",
+        default='sift-1M')
+    parser.add_argument(
+        '--count',
+        default=10)
+    parser.add_argument(
+        '--definitions',
+        metavar='FILE',
+        help='load algorithm definitions from FILE',
+        default='algos.yaml')
+    parser.add_argument(
+        '--limit',
+        default=-1)
+    parser.add_argument(
+        '-o', '--output')
+    parser.add_argument(
+        '-x', '--x-axis',
+        help='Which metric to use on the X-axis',
+        choices=metrics.keys(),
+        default="k-nn")
+    parser.add_argument(
+        '-y', '--y-axis',
+        help='Which metric to use on the Y-axis',
+        choices=metrics.keys(),
+        default="qps")
+    parser.add_argument(
+        '-X', '--x-scale',
+        help='Scale to use when drawing the X-axis. Typically linear, logit or a2',
+        default='linear')
+    parser.add_argument(
+        '-Y', '--y-scale',
+        help='Scale to use when drawing the Y-axis',
+        choices=["linear", "log", "symlog", "logit"],
+        default='linear')
+    parser.add_argument(
+        '--raw',
+        help='Show raw results (not just Pareto frontier) in faded colours',
+        action='store_true')
+    parser.add_argument(
+        '--recompute',
+        help='Clears the cache and recomputes the metrics',
+        action='store_true')
+    args = parser.parse_args()
+
+    if not args.output:
+        args.output = 'results/%s.png' % (args.dataset)
+        print('writing output to %s' % args.output)
+
+    dataset = DATASETS[args.dataset]
+    count = int(args.count)
+    unique_algorithms = get_unique_algorithms()
+    results = load_all_results(args.dataset, count)
+    linestyles = create_linestyles(sorted(unique_algorithms))
+    runs = compute_metrics(dataset.get_groundtruth(k=args.count),
+                           results, args.x_axis, args.y_axis, args.recompute)
+    if not runs:
+        raise Exception('Nothing to plot')
+
+    create_plot(runs, args.raw, args.x_scale,
+                args.y_scale, args.x_axis, args.y_axis, args.output,
+                linestyles)
@@ -0,0 +1,6 @@
+from benchmark.main import main
+from multiprocessing import freeze_support
+
+if __name__ == "__main__":
+    freeze_support()
+    main()
@@ -0,0 +1,3 @@
+from benchmark.runner import run_from_cmdline
+
+run_from_cmdline()
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-FROM ann-benchmarks`
	`1`	`+FROM billion-scale-benchmark`
`2`	`2`
`3`	`3`	`RUN apt-get update && apt-get install -y libopenblas-base libopenblas-dev libpython3-dev swig python3-dev libssl-dev wget`
`4`	`4`	`RUN wget https://github.com/Kitware/CMake/releases/download/v3.18.3/cmake-3.18.3-Linux-x86_64.sh && mkdir cmake && sh cmake-3.18.3-Linux-x86_64.sh --skip-license --prefix=cmake && rm cmake-3.18.3-Linux-x86_64.sh`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from benchmark.runner import run_from_cmdline`
	`2`	`+`
	`3`	`+run_from_cmdline()`