diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..3978a0f
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,2 @@
+.git
+.gitignore
diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..6deafc2
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+max-line-length = 120
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f3c231a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+__pycache__
+*.py[cod]
+
+*.pth
+*.pb
diff --git a/Dockerfile.cpu b/Dockerfile.cpu
new file mode 100644
index 0000000..8d54486
--- /dev/null
+++ b/Dockerfile.cpu
@@ -0,0 +1,67 @@
+FROM ubuntu:18.04
+
+WORKDIR /usr/src/app
+
+ENV LANG="C.UTF-8" LC_ALL="C.UTF-8" PATH="/opt/venv/bin:$PATH" PIP_NO_CACHE_DIR="false" CFLAGS="-mavx2" CXXFLAGS="-mavx2"
+
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+    python3 python3-pip python3-venv \
+    wget make g++ ffmpeg python3-dev libblas-dev liblapack-dev swig \
+    cmake yasm zlib1g-dev && \
+    rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+
+RUN python3 -m venv /opt/venv && \
+    python3 -m pip install pip==19.2.3 pip-tools==4.0.0
+
+# For pytorch and torchvision we need platform specific (cpu vs. gpu) wheels from
+# https://download.pytorch.org/whl/cpu/torch_stable.html
+# To generate hashes run: python3 -m pip hash *.whl
+RUN echo "https://download.pytorch.org/whl/cpu/torch-1.2.0%2Bcpu-cp36-cp36m-manylinux1_x86_64.whl       \
+          --hash=sha256:7b9b943673d3acb446248ba0d6feed6926bf60ce719ace4707a6559c1f57ced7                \
+          \n                                                                                            \
+          https://download.pytorch.org/whl/cpu/torchvision-0.4.0%2Bcpu-cp36-cp36m-manylinux1_x86_64.whl \
+          --hash=sha256:63f342b858b18839fcf3ff8ad857e44a4ff0fcb8cb8e2bdc2f4ed9afa7cec9e0                \
+          \n" >> requirements.txt && cat requirements.txt
+
+RUN python3 -m piptools sync
+
+RUN python3 -c "from torchvision.models import resnet50; resnet50(pretrained=True, progress=False)" && \
+    python3 -c "from torchvision.models.video import r2plus1d_18; r2plus1d_18(pretrained=True, progress=False)"
+
+RUN wget -q https://github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.3.tar.gz -O libjpeg-turbo.tar.gz && \
+    echo "a69598bf079463b34d45ca7268462a18b6507fdaa62bb1dfd212f02041499b5d libjpeg-turbo.tar.gz" | sha256sum -c && \
+    tar xf libjpeg-turbo.tar.gz && \
+    rm libjpeg-turbo.tar.gz && \
+    cd libjpeg-turbo* && \
+    mkdir build && \
+    cd build && \
+    cmake -DCMAKE_BUILD_TYPE=Release -DREQUIRE_SIMD=On -DCMAKE_INSTALL_PREFIX=/usr/local .. && \
+    make -j $(nproc) && \
+    make install && \
+    ldconfig && \
+    cd ../../ && \
+    rm -rf libjpeg-turbo*
+
+RUN python3 -m pip uninstall -y pillow && \
+    python3 -m pip install --no-binary :all: --compile pillow-simd==6.0.0.post0
+
+RUN wget -q https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz -O faiss.tar.gz && \
+    echo "b24d347b0285d01c2ed663ccc7596cd0ea95071f3dd5ebb573ccfc28f15f043b faiss.tar.gz" | sha256sum -c && \
+    tar xf faiss.tar.gz && \
+    rm faiss.tar.gz && \
+    cd faiss* && \
+    ./configure --without-cuda && \
+    make -j $(nproc) && \
+    make -j $(nproc) -C python && \
+    make install && \
+    make -C python install && \
+    cd .. && \
+    rm -rf faiss*
+
+COPY . .
+
+EXPOSE 5000
+ENTRYPOINT ["/usr/src/app/bin/sfi"]
+CMD ["-h"]
diff --git a/Dockerfile.gpu b/Dockerfile.gpu
new file mode 100644
index 0000000..1dba111
--- /dev/null
+++ b/Dockerfile.gpu
@@ -0,0 +1,68 @@
+FROM nvidia/cuda:10.1-cudnn7-devel
+
+WORKDIR /usr/src/app
+
+ENV LANG="C.UTF-8" LC_ALL="C.UTF-8" PATH="/opt/venv/bin:$PATH" PIP_NO_CACHE_DIR="false" CFLAGS="-mavx2" CXXFLAGS="-mavx2"
+
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+    python3 python3-pip python3-venv \
+    wget make g++ ffmpeg python3-dev libblas-dev liblapack-dev swig \
+    cmake yasm zlib1g-dev && \
+    rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt .
+
+RUN python3 -m venv /opt/venv && \
+    python3 -m pip install pip==19.2.3 pip-tools==4.0.0
+
+# For pytorch and torchvision we need platform specific (cpu vs. gpu) wheels from
+# https://download.pytorch.org/whl/cu100/torch_stable.html
+# To generate hashes run: python3 -m pip hash *.whl
+RUN echo "https://download.pytorch.org/whl/cu100/torch-1.2.0-cp36-cp36m-manylinux1_x86_64.whl       \
+          --hash=sha256:a13bf6f78a49d844b85c142b8cd62d2e1833a11ed21ea0bc6b1ac73d24c76415            \
+          \n                                                                                        \
+          https://download.pytorch.org/whl/cu100/torchvision-0.4.0-cp36-cp36m-manylinux1_x86_64.whl \
+          --hash=sha256:2f67efdf6edd9ea7f9cd9a3917ae5c63d5684e3bdb5cc9c2b364c15bdfe4456b            \
+          \n" >> requirements.txt
+
+RUN python3 -m piptools sync
+
+RUN python3 -c "from torchvision.models import resnet50; resnet50(pretrained=True, progress=False)" && \
+    python3 -c "from torchvision.models.video import r2plus1d_18; r2plus1d_18(pretrained=True, progress=False)"
+
+RUN wget -q https://github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.3.tar.gz -O libjpeg-turbo.tar.gz && \
+    echo "a69598bf079463b34d45ca7268462a18b6507fdaa62bb1dfd212f02041499b5d libjpeg-turbo.tar.gz" | sha256sum -c && \
+    tar xf libjpeg-turbo.tar.gz && \
+    rm libjpeg-turbo.tar.gz && \
+    cd libjpeg-turbo* && \
+    mkdir build && \
+    cd build && \
+    cmake -DCMAKE_BUILD_TYPE=Release -DREQUIRE_SIMD=On -DCMAKE_INSTALL_PREFIX=/usr/local .. && \
+    make -j $(nproc) && \
+    make install && \
+    ldconfig && \
+    cd ../../ && \
+    rm -rf libjpeg-turbo*
+
+
+RUN python3 -m pip uninstall -y pillow && \
+    python3 -m pip install --no-binary :all: --compile pillow-simd==6.0.0.post0
+
+RUN wget -q https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz -O faiss.tar.gz && \
+    echo "b24d347b0285d01c2ed663ccc7596cd0ea95071f3dd5ebb573ccfc28f15f043b faiss.tar.gz" | sha256sum -c && \
+    tar xf faiss.tar.gz && \
+    rm faiss.tar.gz && \
+    cd faiss* && \
+    ./configure --with-cuda-arch="-gencode=arch=compute_37,code=compute_37 -gencode=arch=compute_70,code=compute_70" --with-cuda="/usr/local/cuda" && \
+    make -j $(nproc) && \
+    make -j $(nproc) -C python && \
+    make install && \
+    make -C python install && \
+    cd .. && \
+    rm -rf faiss*
+
+COPY . .
+
+EXPOSE 5000
+ENTRYPOINT ["/usr/src/app/bin/sfi"]
+CMD ["-h"]
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..2620140
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 MoabitCoin
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..4d72bfc
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,32 @@
+dockerimage ?= moabitcoin/sfi
+dockerfile ?= Dockerfile.cpu
+srcdir ?= $(shell pwd)
+datadir ?= $(shell pwd)
+
+install:
+	@docker build -t $(dockerimage) -f $(dockerfile) .
+
+i: install
+
+
+update:
+	@docker build -t $(dockerimage) -f $(dockerfile) . --pull --no-cache
+
+u: update
+
+
+run:
+	@docker run -it --rm --ipc="host" --network="host" -p 5000:5000 -v $(srcdir)/sfi:/usr/src/app/sfi -v $(datadir):/data --entrypoint=/bin/bash $(dockerimage)
+
+r: run
+
+
+publish:
+	@docker image save $(dockerimage) \
+	  | pv -N "Publish $(dockerimage) to $(sshopts)" -s $(shell docker image inspect $(dockerimage) --format "{{.Size}}") \
+	  | ssh $(sshopts) "docker image load"
+
+p: publish
+
+
+.PHONY: install i run r update u publish p
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..aa4012b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,104 @@
+# Semantic Frame Index
+
+Fast and efficient queries on video frames by semantic similarity.
+
+
+## Use Case
+
+We record tens of thousand hours of drive video data and need to be able to search for semantically similar scenarios.
+Simlarity could mean similar lighting conditions, similar vehicle types, similar traffic volumes, similar objects on the road, and so on.
+
+
+## Implementation Sketch
+
+We
+- extract key frames using a neural net for frame similarity in feature space
+- extract a trained convolutional neural net's high level feature maps for all key frames
+- compute Maximum Activations of Convolution (MAC) features from the high-level feature maps
+- index the feature maps for approximate nearest neighbor searches based on L2 distance
+- query the indexed dataset for semantically similar scenarios
+
+
+## Usage
+
+All tools can be invoked via
+
+    ./bin/sfi <tool> <args>
+
+    ./bin/sfi --help
+    ./bin/sfi <tool> --help
+
+
+### stream-index
+
+Builds an index from a directory of images for fast and efficient approximate nearest neighbor queries based on L2 distance.
+The quantizer for the index needs to get trained on a small subset of the feature maps to approximate the dataset's centroids.
+We recommend runing this step on GPUs.
+
+
+### save-feature
+
+Extracts high level feature maps and computes MACs for an image frames from a trained convolutional neural net.
+
+
+### save-frames
+
+Extracts semantic key frames from videos based on a trained convolution net for feature similarity between frames.
+
+
+### query-server
+
+Loads up the index (slow) and keeps it in memory to handle nearest neighbor queries (fast).
+Responds to queries by searching the index, aggregating results, and re-ranking them.
+
+
+### query-client
+
+Sends nearest neighbor requests against the query server and reports results to the user.
+The query and results are based on the saved MAC features.
+
+
+### model-train
+
+Trains a binary classification model on a dataset (potentially noisy and obtained from the index).
+We recommend runing this step on GPUs.
+
+
+### model-infer
+
+Predicts binary classification labels on a dataset, using a trained model.
+
+
+## Development
+
+Create a self-contained reproducible development environment
+
+    make i
+
+Get into the development environment
+
+    make r
+
+The Python source code directory is mounted into the container: if you modify it on the host it will get modified in the container.
+
+To make data visible in the container set the datadir env var, e.g. to make your `/tmp` directory show up in `/data` inside the container run
+
+    make r datadir=/tmp
+
+See the `Makefile` for options and more advanced targets.
+
+
+## References
+
+- [Particular object retrieval with integral max-pooling of CNN activations](https://arxiv.org/abs/1511.05879)
+- Product Quantizer (PQ) [part 1](http://mccormickml.com/2017/10/13/product-quantizer-tutorial-part-1/), and [part 2](http://mccormickml.com/2017/10/22/product-quantizer-tutorial-part-2/)
+- [Product Quantization for Nearest Neighbor Search](https://hal.inria.fr/file/index/docid/514462/filename/paper_hal.pdf)
+- [Billion-scale similarity search with GPUs](https://arxiv.org/pdf/1702.08734.pdf)
+- [faiss wiki](https://github.com/facebookresearch/faiss/wiki)
+
+
+## License
+
+Copyright © 2019 MoabitCoin
+
+Distributed under the MIT License (MIT).
diff --git a/bin/sfi b/bin/sfi
new file mode 100755
index 0000000..9f648f8
--- /dev/null
+++ b/bin/sfi
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+python3 -m sfi.tools "$@"
diff --git a/requirements.in b/requirements.in
new file mode 100644
index 0000000..320a6af
--- /dev/null
+++ b/requirements.in
@@ -0,0 +1,7 @@
+numpy
+pillow
+tqdm
+flask
+requests
+einops
+scikit-video
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..49c0fd5
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,145 @@
+#
+# This file is autogenerated by pip-compile
+# To update, run:
+#
+#    pip-compile --generate-hashes
+#
+certifi==2019.6.16 \
+    --hash=sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939 \
+    --hash=sha256:945e3ba63a0b9f577b1395204e13c3a231f9bc0223888be653286534e5873695 \
+    # via requests
+chardet==3.0.4 \
+    --hash=sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae \
+    --hash=sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691 \
+    # via requests
+click==7.0 \
+    --hash=sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13 \
+    --hash=sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7 \
+    # via flask
+einops==0.1.0 \
+    --hash=sha256:4ab512fe059c0841e1a315449ca9d7f35eaa05c8c095a14f2c1b92b2b77684d2 \
+    --hash=sha256:4fd64864fcb8159074da3213b9327c242536784416cbf423745ef8579850d30b
+flask==1.1.1 \
+    --hash=sha256:13f9f196f330c7c2c5d7a5cf91af894110ca0215ac051b5844701f2bfd934d52 \
+    --hash=sha256:45eb5a6fd193d6cf7e0cf5d8a5b31f83d5faae0293695626f539a823e93b13f6
+idna==2.8 \
+    --hash=sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407 \
+    --hash=sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c \
+    # via requests
+itsdangerous==1.1.0 \
+    --hash=sha256:321b033d07f2a4136d3ec762eac9f16a10ccd60f53c0c91af90217ace7ba1f19 \
+    --hash=sha256:b12271b2047cb23eeb98c8b5622e2e5c5e9abd9784a153e9d8ef9cb4dd09d749 \
+    # via flask
+jinja2==2.10.1 \
+    --hash=sha256:065c4f02ebe7f7cf559e49ee5a95fb800a9e4528727aec6f24402a5374c65013 \
+    --hash=sha256:14dd6caf1527abb21f08f86c784eac40853ba93edb79552aa1e4b8aef1b61c7b \
+    # via flask
+markupsafe==1.1.1 \
+    --hash=sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473 \
+    --hash=sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161 \
+    --hash=sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235 \
+    --hash=sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5 \
+    --hash=sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff \
+    --hash=sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b \
+    --hash=sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1 \
+    --hash=sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e \
+    --hash=sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183 \
+    --hash=sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66 \
+    --hash=sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1 \
+    --hash=sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1 \
+    --hash=sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e \
+    --hash=sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b \
+    --hash=sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905 \
+    --hash=sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735 \
+    --hash=sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d \
+    --hash=sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e \
+    --hash=sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d \
+    --hash=sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c \
+    --hash=sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21 \
+    --hash=sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2 \
+    --hash=sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5 \
+    --hash=sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b \
+    --hash=sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6 \
+    --hash=sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f \
+    --hash=sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f \
+    --hash=sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7 \
+    # via jinja2
+numpy==1.17.0 \
+    --hash=sha256:03e311b0a4c9f5755da7d52161280c6a78406c7be5c5cc7facfbcebb641efb7e \
+    --hash=sha256:0cdd229a53d2720d21175012ab0599665f8c9588b3b8ffa6095dd7b90f0691dd \
+    --hash=sha256:312bb18e95218bedc3563f26fcc9c1c6bfaaf9d453d15942c0839acdd7e4c473 \
+    --hash=sha256:464b1c48baf49e8505b1bb754c47a013d2c305c5b14269b5c85ea0625b6a988a \
+    --hash=sha256:5adfde7bd3ee4864536e230bcab1c673f866736698724d5d28c11a4d63672658 \
+    --hash=sha256:7724e9e31ee72389d522b88c0d4201f24edc34277999701ccd4a5392e7d8af61 \
+    --hash=sha256:8d36f7c53ae741e23f54793ffefb2912340b800476eb0a831c6eb602e204c5c4 \
+    --hash=sha256:910d2272403c2ea8a52d9159827dc9f7c27fb4b263749dca884e2e4a8af3b302 \
+    --hash=sha256:951fefe2fb73f84c620bec4e001e80a80ddaa1b84dce244ded7f1e0cbe0ed34a \
+    --hash=sha256:9588c6b4157f493edeb9378788dcd02cb9e6a6aeaa518b511a1c79d06cbd8094 \
+    --hash=sha256:9ce8300950f2f1d29d0e49c28ebfff0d2f1e2a7444830fbb0b913c7c08f31511 \
+    --hash=sha256:be39cca66cc6806652da97103605c7b65ee4442c638f04ff064a7efd9a81d50a \
+    --hash=sha256:c3ab2d835b95ccb59d11dfcd56eb0480daea57cdf95d686d22eff35584bc4554 \
+    --hash=sha256:eb0fc4a492cb896346c9e2c7a22eae3e766d407df3eb20f4ce027f23f76e4c54 \
+    --hash=sha256:ec0c56eae6cee6299f41e780a0280318a93db519bbb2906103c43f3e2be1206c \
+    --hash=sha256:f4e4612de60a4f1c4d06c8c2857cdcb2b8b5289189a12053f37d3f41f06c60d0
+pillow==6.1.0 \
+    --hash=sha256:0804f77cb1e9b6dbd37601cee11283bba39a8d44b9ddb053400c58e0c0d7d9de \
+    --hash=sha256:0ab7c5b5d04691bcbd570658667dd1e21ca311c62dcfd315ad2255b1cd37f64f \
+    --hash=sha256:0b3e6cf3ea1f8cecd625f1420b931c83ce74f00c29a0ff1ce4385f99900ac7c4 \
+    --hash=sha256:365c06a45712cd723ec16fa4ceb32ce46ad201eb7bbf6d3c16b063c72b61a3ed \
+    --hash=sha256:38301fbc0af865baa4752ddae1bb3cbb24b3d8f221bf2850aad96b243306fa03 \
+    --hash=sha256:3aef1af1a91798536bbab35d70d35750bd2884f0832c88aeb2499aa2d1ed4992 \
+    --hash=sha256:3fe0ab49537d9330c9bba7f16a5f8b02da615b5c809cdf7124f356a0f182eccd \
+    --hash=sha256:45a619d5c1915957449264c81c008934452e3fd3604e36809212300b2a4dab68 \
+    --hash=sha256:49f90f147883a0c3778fd29d3eb169d56416f25758d0f66775db9184debc8010 \
+    --hash=sha256:571b5a758baf1cb6a04233fb23d6cf1ca60b31f9f641b1700bfaab1194020555 \
+    --hash=sha256:5ac381e8b1259925287ccc5a87d9cf6322a2dc88ae28a97fe3e196385288413f \
+    --hash=sha256:6153db744a743c0c8c91b8e3b9d40e0b13a5d31dbf8a12748c6d9bfd3ddc01ad \
+    --hash=sha256:6fd63afd14a16f5d6b408f623cc2142917a1f92855f0df997e09a49f0341be8a \
+    --hash=sha256:70acbcaba2a638923c2d337e0edea210505708d7859b87c2bd81e8f9902ae826 \
+    --hash=sha256:70b1594d56ed32d56ed21a7fbb2a5c6fd7446cdb7b21e749c9791eac3a64d9e4 \
+    --hash=sha256:76638865c83b1bb33bcac2a61ce4d13c17dba2204969dedb9ab60ef62bede686 \
+    --hash=sha256:7b2ec162c87fc496aa568258ac88631a2ce0acfe681a9af40842fc55deaedc99 \
+    --hash=sha256:7cee2cef07c8d76894ebefc54e4bb707dfc7f258ad155bd61d87f6cd487a70ff \
+    --hash=sha256:7d16d4498f8b374fc625c4037742fbdd7f9ac383fd50b06f4df00c81ef60e829 \
+    --hash=sha256:b50bc1780681b127e28f0075dfb81d6135c3a293e0c1d0211133c75e2179b6c0 \
+    --hash=sha256:bd0582f831ad5bcad6ca001deba4568573a4675437db17c4031939156ff339fa \
+    --hash=sha256:cfd40d8a4b59f7567620410f966bb1f32dc555b2b19f82a91b147fac296f645c \
+    --hash=sha256:e3ae410089de680e8f84c68b755b42bc42c0ceb8c03dbea88a5099747091d38e \
+    --hash=sha256:e9046e559c299b395b39ac7dbf16005308821c2f24a63cae2ab173bd6aa11616 \
+    --hash=sha256:ef6be704ae2bc8ad0ebc5cb850ee9139493b0fc4e81abcc240fb392a63ebc808 \
+    --hash=sha256:f8dc19d92896558f9c4317ee365729ead9d7bbcf2052a9a19a3ef17abbb8ac5b
+requests==2.22.0 \
+    --hash=sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4 \
+    --hash=sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31
+scikit-video==1.1.11 \
+    --hash=sha256:4fc131e509aaeeb0eecb6acb58b92a7ef905be5dbe27ed1d1ae089634b601f23 \
+    --hash=sha256:5061d2aeae1892b73a97c89a82942b3e8eebf2fe23e59c60e06ede5f8a24ed1e
+scipy==1.3.1 \
+    --hash=sha256:0baa64bf42592032f6f6445a07144e355ca876b177f47ad8d0612901c9375bef \
+    --hash=sha256:243b04730d7223d2b844bda9500310eecc9eda0cba9ceaf0cde1839f8287dfa8 \
+    --hash=sha256:2643cfb46d97b7797d1dbdb6f3c23fe3402904e3c90e6facfe6a9b98d808c1b5 \
+    --hash=sha256:396eb4cdad421f846a1498299474f0a3752921229388f91f60dc3eda55a00488 \
+    --hash=sha256:3ae3692616975d3c10aca6d574d6b4ff95568768d4525f76222fb60f142075b9 \
+    --hash=sha256:435d19f80b4dcf67dc090cc04fde2c5c8a70b3372e64f6a9c58c5b806abfa5a8 \
+    --hash=sha256:46a5e55850cfe02332998b3aef481d33f1efee1960fe6cfee0202c7dd6fc21ab \
+    --hash=sha256:75b513c462e58eeca82b22fc00f0d1875a37b12913eee9d979233349fce5c8b2 \
+    --hash=sha256:7ccfa44a08226825126c4ef0027aa46a38c928a10f0a8a8483c80dd9f9a0ad44 \
+    --hash=sha256:89dd6a6d329e3f693d1204d5562dd63af0fd7a17854ced17f9cbc37d5b853c8d \
+    --hash=sha256:a81da2fe32f4eab8b60d56ad43e44d93d392da228a77e229e59b51508a00299c \
+    --hash=sha256:a9d606d11eb2eec7ef893eb825017fbb6eef1e1d0b98a5b7fc11446ebeb2b9b1 \
+    --hash=sha256:ac37eb652248e2d7cbbfd89619dce5ecfd27d657e714ed049d82f19b162e8d45 \
+    --hash=sha256:cbc0611699e420774e945f6a4e2830f7ca2b3ee3483fca1aa659100049487dd5 \
+    --hash=sha256:d02d813ec9958ed63b390ded463163685af6025cb2e9a226ec2c477df90c6957 \
+    --hash=sha256:dd3b52e00f93fd1c86f2d78243dfb0d02743c94dd1d34ffea10055438e63b99d \
+    # via scikit-video
+tqdm==4.33.0 \
+    --hash=sha256:1dc82f87a8726602fa7177a091b5e8691d6523138a8f7acd08e58088f51e389f \
+    --hash=sha256:47220a4f2aeebbc74b0ab317584264ea44c745e1fd5ff316b675cd0aff8afad8
+urllib3==1.25.3 \
+    --hash=sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1 \
+    --hash=sha256:dbe59173209418ae49d485b87d1681aefa36252ee85884c31346debd19463232 \
+    # via requests
+werkzeug==0.15.5 \
+    --hash=sha256:87ae4e5b5366da2347eb3116c0e6c681a0e939a33b2805e2c0cbd282664932c4 \
+    --hash=sha256:a13b74dd3c45f758d4ebdb224be8f1ab8ef58b3c0ffc1783a8c7d9f4f50227e6 \
+    # via flask
diff --git a/scripts/drawlines.py b/scripts/drawlines.py
new file mode 100755
index 0000000..2febc55
--- /dev/null
+++ b/scripts/drawlines.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+
+import sys
+
+from PIL import Image
+from PIL import ImageDraw
+
+
+if len(sys.argv) != 3:
+    sys.exit("Usage: {} in.jpg out.jpg".format(sys.argv[0]))
+
+
+infile = sys.argv[1]
+outfile = sys.argv[2]
+
+image = Image.open(infile)
+w, h = image.size
+
+draw = ImageDraw.Draw(image)
+
+n = 7
+
+dx = w // n
+dy = h // n
+
+for i in range(1, n):
+    draw.line([i * dx, 0, i * dx, h], fill="green", width=1)
+
+for j in range(1, n):
+    draw.line([0, j * dy, w, j * dy], fill="green", width=1)
+
+image.save(outfile, optimize=True)
diff --git a/scripts/key-frames-to-video b/scripts/key-frames-to-video
new file mode 100755
index 0000000..ac39725
--- /dev/null
+++ b/scripts/key-frames-to-video
@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+set -o errexit
+set -o pipefail
+set -o nounset
+
+if [ $# -lt 2 ]; then
+    echo "Usage: $(basename $0) framedir video.mp4 [fps] [resolution]"
+    exit 1
+fi
+
+readonly framedir="${1}"
+readonly video="${2}"
+readonly rate="${3:-2}"
+readonly resolution="${4:-320x180}"
+
+ffmpeg -y -loglevel error -r "${rate}" -vsync 0 -f image2 -pattern_type glob -i "${framedir}/*.jpg" -s "${resolution}" -vcodec libx264 "${video}"
diff --git a/scripts/scale-crop-image b/scripts/scale-crop-image
new file mode 100755
index 0000000..3a8a5f2
--- /dev/null
+++ b/scripts/scale-crop-image
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+set -o errexit
+set -o pipefail
+set -o nounset
+
+if [ $# -lt 2 ]; then
+    echo "Usage: $(basename $0) in.jpg out.jpg [WxH]"
+    exit 1
+fi
+
+readonly resolution="${3:-224x224}"
+
+convert "${1}" -resize "${resolution}^" -gravity Center -extent "${resolution}" "${2}"
diff --git a/scripts/split-image b/scripts/split-image
new file mode 100755
index 0000000..7756622
--- /dev/null
+++ b/scripts/split-image
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+set -o errexit
+set -o pipefail
+set -o nounset
+
+if [ $# -lt 2 ]; then
+    echo "Usage: $(basename $0) in.jpg outdir [WxH]"
+    exit 1
+fi
+
+readonly resolution="${3:-32x32}"
+
+convert "${1}" -crop "${resolution}" +repage +adjoin "${2}/split-%d.jpg"
diff --git a/scripts/video-to-key-frames b/scripts/video-to-key-frames
new file mode 100755
index 0000000..0b369ff
--- /dev/null
+++ b/scripts/video-to-key-frames
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+set -o errexit
+set -o pipefail
+set -o nounset
+
+if [ $# -ne 2 ]; then
+    echo "Usage: $(basename $0) video.mp4 outdir"
+    exit 1
+fi
+
+ffmpeg -y -loglevel error -skip_frame nokey -i "${1}" -vsync 0 -f image2 "${2}/frame-%d.jpg"
diff --git a/scripts/video-to-resampled-frames b/scripts/video-to-resampled-frames
new file mode 100755
index 0000000..887d8c9
--- /dev/null
+++ b/scripts/video-to-resampled-frames
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+set -o errexit
+set -o pipefail
+set -o nounset
+
+if [ $# -ne 3 ]; then
+    echo "Usage: $(basename $0) video.mp4 outdir fps"
+    exit 1
+fi
+
+ffmpeg -y -loglevel error -r "${3}" -i "${1}" -vsync 0 -f image2 "${2}/frame-%d.jpg"
diff --git a/sfi/__init__.py b/sfi/__init__.py
new file mode 100644
index 0000000..37769da
--- /dev/null
+++ b/sfi/__init__.py
@@ -0,0 +1,4 @@
+"""Semantic Frame Index
+
+   Fast and efficient queries on video frames by semantic similarity.
+"""
diff --git a/sfi/datasets.py b/sfi/datasets.py
new file mode 100644
index 0000000..372dfad
--- /dev/null
+++ b/sfi/datasets.py
@@ -0,0 +1,50 @@
+from PIL import Image
+
+from torch.utils.data import Dataset
+
+from sfi.utils import files
+
+# PyTorch can not transport a Path object through data loaders.
+# Serialize Path to str here; users have to encode via Path(path).
+
+
+class ImageDirectory(Dataset):
+    def __init__(self, root, transform=None):
+        super().__init__()
+
+        self.paths = files(root)
+        self.transform = transform
+
+    def __len__(self):
+        return len(self.paths)
+
+    def __getitem__(self, i):
+        path = str(self.paths[i])
+        image = Image.open(path)
+
+        if self.transform is not None:
+            image = self.transform(image)
+
+        return image, path
+
+
+class ImageSingleton(Dataset):
+    def __init__(self, root, transform=None):
+        super().__init__()
+
+        self.path = root
+        self.transform = transform
+
+    def __len__(self):
+        return 1
+
+    def __getitem__(self, i):
+        assert i == 0
+
+        path = str(self.path)
+        image = Image.open(path)
+
+        if self.transform is not None:
+            image = self.transform(image)
+
+        return image, path
diff --git a/sfi/features.py b/sfi/features.py
new file mode 100644
index 0000000..d414310
--- /dev/null
+++ b/sfi/features.py
@@ -0,0 +1,74 @@
+import sys
+
+import torch
+import torch.nn as nn
+from torchvision.models import resnet50
+from torchvision.transforms import Compose, Normalize, ToTensor, Resize
+
+from einops import rearrange
+
+from sfi.transforms import ToImageMode, PadToMultiple
+
+
+class FeatureExtractor:
+    def __init__(self, image_size):
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+        if torch.cuda.is_available():
+            print("Using CUDA, benchmarking implementations", file=sys.stderr)
+            torch.backends.cudnn.benchmark = True
+
+        # Set up pre-trained resnet in inference mode
+        resnet = resnet50(pretrained=True, progress=False)
+
+        # Chop off classification head
+        resnet.fc = nn.Identity()
+
+        # In addition do not pool, keep spatial information if user wants to
+        resnet.avgpool = nn.Identity()
+
+        for params in resnet.parameters():
+            params.requires_grad = False
+
+        resnet = resnet.to(device)
+        resnet = nn.DataParallel(resnet)
+
+        resnet.eval()
+
+        self.net = resnet
+        self.device = device
+        self.image_size = image_size
+
+    @property
+    def transform(self):
+        # ImageNet statistics (because we use pre-trained model)
+        mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
+
+        return Compose([
+            ToImageMode("RGB"),
+            Resize(self.image_size),
+            # resnet5 downsamples x2 five times
+            PadToMultiple(32, fill=0),
+            ToTensor(),
+            Normalize(mean=mean, std=std)])
+
+    # batch of NCHW image tensors to batch of NHWC feature tensors
+    def __call__(self, images):
+        n, c, h, w = images.size(0), 2048, images.size(2), images.size(3)
+
+        assert h % 32 == 0, "height divisible by 32 for resnet50"
+        assert w % 32 == 0, "width divisible by 32 for resnet50"
+
+        with torch.no_grad():
+            images = images.to(self.device)
+
+            # resnet5 downsamples x2 five times
+            h, w = h // 32, w // 32
+
+            # resnet50 outputs flat view over a batch with 2048 channels, spatial resolution HxW
+            # https://github.com/pytorch/vision/blob/ac2e995a4352267f65e7cc6d354bde683a4fb402/torchvision/models/resnet.py#L202-L204
+
+            features = self.net(images)
+            features = rearrange(features, "n (c h w) -> n (h w) c", n=n, h=h, w=w, c=c)
+
+            return features
diff --git a/sfi/index.py b/sfi/index.py
new file mode 100644
index 0000000..5d6424c
--- /dev/null
+++ b/sfi/index.py
@@ -0,0 +1,52 @@
+import numpy as np
+from einops import rearrange
+
+from faiss import IndexPQ
+
+from sfi.io import IndexIO, JsonIO
+
+
+# TODO: benchmark
+kNumResultsPerIndex = 512
+
+
+class IndexQueryError(Exception):
+    pass
+
+
+class Index:
+    def __init__(self, path, metadata, features_size, num_probes=1):
+        self.index = IndexIO.load(path)
+        self.index.nprobes = num_probes
+
+        # Disable Polysemous Codes until we know threshold for MACs
+        # self.index.search_type = IndexPQ.ST_polysemous
+        # self.index.polysemous_ht = 768
+
+        self.metadata = JsonIO.load(metadata)
+        self.features_size = features_size
+
+    def query(self, query, num_results=1):
+        N, C = query.shape
+
+        if N != self.features_size * self.features_size:
+            raise IndexQueryError("query feature size does not match index feature size")
+
+        # C-array required for faiss FFI: tensors might not be contiguous
+        query = np.ascontiguousarray(query)
+
+        dists, idxs = self.index.search(query, kNumResultsPerIndex)
+
+        dists = rearrange(dists, "() n -> n")
+        idxs = rearrange(idxs, "() n -> n")
+
+        results = list(zip(dists, idxs))
+
+        _, uniqued = np.unique([i for _, i in results], return_index=True)
+        results = [results[i] for i in uniqued]
+        results = sorted(results, key=lambda v: v[0])
+
+        results = [(round(d.item(), 3), self.metadata[i])
+                   for d, i in results[:num_results]]
+
+        return results
diff --git a/sfi/io.py b/sfi/io.py
new file mode 100644
index 0000000..46d236c
--- /dev/null
+++ b/sfi/io.py
@@ -0,0 +1,36 @@
+import json
+
+import numpy as np
+import faiss
+
+
+class ArrayIO:
+    @staticmethod
+    def save(path, x):
+        return np.save(str(path), x, allow_pickle=False)
+
+    @staticmethod
+    def load(path):
+        return np.load(str(path), allow_pickle=False)
+
+
+class IndexIO:
+    @staticmethod
+    def save(path, x):
+        return faiss.write_index(x, str(path))
+
+    @staticmethod
+    def load(path):
+        return faiss.read_index(str(path))
+
+
+class JsonIO:
+    @staticmethod
+    def save(path, x):
+        with path.open("w") as fd:
+            return json.dump(x, fd)
+
+    @staticmethod
+    def load(path):
+        with path.open("r") as fd:
+            return json.load(fd)
diff --git a/sfi/mixup.py b/sfi/mixup.py
new file mode 100644
index 0000000..a830605
--- /dev/null
+++ b/sfi/mixup.py
@@ -0,0 +1,59 @@
+import torch
+import torch.nn as nn
+
+import numpy as np
+
+
+# Mixup for data augmentation
+# https://arxiv.org/abs/1710.09412
+
+class MixupDataLoaderAdaptor:
+    def __init__(self, dataloader, alpha=0.4):
+        self.dataloader = dataloader
+        self.dataiter = None
+        self.alpha = alpha
+
+    def __len__(self):
+        return len(self.dataloader)
+
+    def __iter__(self):
+        self.dataiter = iter(self.dataloader)
+        return self
+
+    def __next__(self):
+        inputs1, labels1 = next(self.dataiter)
+
+        n = inputs1.size(0)
+
+        # draw t from (symmetric) beta distribution
+        # take from one side to prevent duplicates
+
+        t = np.random.beta(self.alpha, self.alpha, size=n)
+        t = np.concatenate([t[:, None], 1 - t[:, None]], axis=1).max(axis=1)
+        t = torch.FloatTensor(t)
+        t = t.view(n, 1, 1, 1)
+
+        # shuffle the batch inputs and targets to get second batch
+
+        r = np.random.permutation(n)
+        inputs2, labels2 = inputs1[r], labels1[r]
+
+        # mix up the original batch with the shuffled batch
+
+        inputs = t * inputs1 + (1 - t) * inputs2
+
+        # With CrossEntropy we do not need the mixed up labels
+        # labels = t * labels1.float() + (1 - t) * labels2.float()
+
+        return inputs, t, labels1, labels2
+
+
+class MixupCrossEntropyLossAdaptor(nn.Module):
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+        self.criterion = nn.CrossEntropyLoss(*args, **kwargs)
+
+    def forward(self, outputs, t, labels1, labels2):
+        lhs = t * self.criterion(outputs, labels1)
+        rhs = (1 - t) * self.criterion(outputs, labels2)
+        return (lhs + rhs).mean()
diff --git a/sfi/tools/__init__.py b/sfi/tools/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/sfi/tools/__main__.py b/sfi/tools/__main__.py
new file mode 100644
index 0000000..bbf2621
--- /dev/null
+++ b/sfi/tools/__main__.py
@@ -0,0 +1,89 @@
+from pathlib import Path
+
+import argparse
+
+import sfi.tools.frames
+import sfi.tools.feature
+import sfi.tools.feature3d
+import sfi.tools.stream
+import sfi.tools.server
+import sfi.tools.client
+import sfi.tools.train
+import sfi.tools.infer
+import sfi.tools.export
+
+parser = argparse.ArgumentParser(prog="sficmd")
+subcmd = parser.add_subparsers(title="commands", metavar="")
+subcmd.required = True
+
+Fmt = argparse.ArgumentDefaultsHelpFormatter
+
+frames = subcmd.add_parser("save-frames", help="saves key frames for video", formatter_class=Fmt)
+frames.add_argument("--video", type=Path, required=True, help="file load video from")
+frames.add_argument("--frames", type=Path, required=True, help="directory to save key frames to")
+frames.add_argument("--similarity", type=float, default=0.95, help="similarity key frame threshold")
+frames.add_argument("--pool", choices=["mean", "max"], default="mean", help="spatial pooling mode")
+frames.add_argument("--image-size", type=int, default=7 * 32, choices=[v * 32 for v in range(1, 15)])
+frames.add_argument("--batch-size", type=int, default=8)
+frames.set_defaults(main=sfi.tools.frames.main)
+
+stream = subcmd.add_parser("stream-index", help="builds an index in streaming mode", formatter_class=Fmt)
+stream.add_argument("--index", type=Path, required=True, help="file to save index to")
+stream.add_argument("--frames", type=Path, required=True, help="directory to load image frames from")
+stream.add_argument("--num-train", type=int, required=True, help="number of samples to train on")
+stream.add_argument("--image-size", type=int, default=14 * 32, choices=[v * 32 for v in range(1, 15)])
+stream.add_argument("--batch-size", type=int, default=64)
+stream.add_argument("--num-workers", type=int, default=0)
+stream.set_defaults(main=sfi.tools.stream.main)
+
+feature = subcmd.add_parser("save-feature", help="saves features for frames", formatter_class=Fmt)
+feature.add_argument("--frame", type=Path, required=True, help="path to image frame")
+feature.add_argument("--feature", type=Path, required=True, help="path to save features to")
+feature.add_argument("--image-size", type=int, default=14 * 32, choices=[v * 32 for v in range(1, 15)])
+feature.set_defaults(main=sfi.tools.feature.main)
+
+feature3d = subcmd.add_parser("save-feature3d", help="saves features for videos", formatter_class=Fmt)
+feature3d.add_argument("--video", type=Path, required=True, help="path to video")
+feature3d.add_argument("--feature", type=Path, required=True, help="path to save features to")
+feature3d.add_argument("--timesteps", type=int, default=64, help="frames per sequence along time axis")
+feature3d.set_defaults(main=sfi.tools.feature3d.main)
+
+server = subcmd.add_parser("query-server", help="starts up the index query http server", formatter_class=Fmt)
+server.add_argument("--index", type=Path, required=True, help="file to load index from")
+server.add_argument("--host", type=str, default="127.0.0.1")
+server.add_argument("--port", type=int, default=5000)
+server.add_argument("--num-probes", type=int, default=1, help="number of cells to visit during search")
+server.add_argument("--features-size", type=int, default=1, choices=range(1, 15))
+server.set_defaults(main=sfi.tools.server.main)
+
+client = subcmd.add_parser("query-client", help="queries the query server for similar features", formatter_class=Fmt)
+client.add_argument("--host", type=str, default="127.0.0.1")
+client.add_argument("--port", type=int, default=5000)
+client.add_argument("--query", type=Path, required=True, help="feature file to query the index with")
+client.add_argument("--num-results", type=int, default=10, help="number of similar frames to query for")
+client.set_defaults(main=sfi.tools.client.main)
+
+train = subcmd.add_parser("model-train", help="trains a classifier model", formatter_class=Fmt)
+train.add_argument("--model", type=Path, required=True, help="file to save trained model to")
+train.add_argument("--resume-from", type=Path, help="file to load trained model from")
+train.add_argument("--dataset", type=Path, required=True, help="directory to load dataset from")
+train.add_argument("--batch-size", type=int, default=24)
+train.add_argument("--num-workers", type=int, default=0)
+train.add_argument("--num-epochs", type=int, default=100)
+train.set_defaults(main=sfi.tools.train.main)
+
+infer = subcmd.add_parser("model-infer", help="runs inference with a classifier model", formatter_class=Fmt)
+infer.add_argument("--model", type=Path, required=True, help="file to load trained model from")
+infer.add_argument("--dataset", type=Path, required=True, help="directory to load dataset from")
+infer.add_argument("--results", type=Path, required=True, help="file to save results to")
+infer.add_argument("--batch-size", type=int, default=64)
+infer.add_argument("--num-workers", type=int, default=0)
+infer.set_defaults(main=sfi.tools.infer.main)
+
+export = subcmd.add_parser("model-export", help="export a classifier model to onnx", formatter_class=Fmt)
+export.add_argument("--model", type=Path, required=True, help="file to load trained model from")
+export.add_argument("--onnx", type=Path, required=True, help="file to save trained onnx model to")
+export.set_defaults(main=sfi.tools.export.main)
+
+args = parser.parse_args()
+args.main(args)
diff --git a/sfi/tools/client.py b/sfi/tools/client.py
new file mode 100644
index 0000000..c224ccf
--- /dev/null
+++ b/sfi/tools/client.py
@@ -0,0 +1,33 @@
+import sys
+import json
+import base64
+
+import requests
+from einops import rearrange
+
+from sfi.io import ArrayIO
+
+
+def main(args):
+    query = ArrayIO.load(args.query)
+
+    if len(query.shape) == 1:  # handle (C,) as (1, C)
+        query = rearrange(query, "n -> () n")
+
+    N, C = query.shape
+    dtype = str(query.dtype)
+    feature = base64.b64encode(query.ravel()).decode("utf-8")
+
+    url = "http://{}:{}".format(args.host, args.port)
+
+    payload = {"num_results": args.num_results,
+               "feature": feature,
+               "shape": [N, C],
+               "dtype": dtype}
+
+    res = requests.post(url, data=json.dumps(payload))
+
+    if res.status_code != requests.codes.ok:
+        sys.exit("Error: unable to query server")
+
+    print(json.dumps(res.json()))
diff --git a/sfi/tools/export.py b/sfi/tools/export.py
new file mode 100644
index 0000000..343592f
--- /dev/null
+++ b/sfi/tools/export.py
@@ -0,0 +1,28 @@
+import torch
+import torch.onnx
+import torch.nn as nn
+
+from torchvision.models import resnet50
+
+def main(args):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    if torch.cuda.is_available():
+        print("Using CUDA, benchmarking implementations", file=sys.stderr)
+        torch.backends.cudnn.benchmark = True
+
+    # Binary classifier on top of resnet50
+    model = resnet50()
+    model.fc = nn.Linear(model.fc.in_features, 2)
+
+    model = model.to(device)
+    model = nn.DataParallel(model)
+
+    # Restore trained weights
+    weights = torch.load(str(args.model), map_location=device)
+    model.load_state_dict(weights)
+
+    # Run dummy batch through model to trace computational graph
+    batch = torch.rand(1, 3, 224, 224, device=device)
+
+    torch.onnx.export(model.module, batch, str(args.onnx))
diff --git a/sfi/tools/feature.py b/sfi/tools/feature.py
new file mode 100644
index 0000000..8d450cf
--- /dev/null
+++ b/sfi/tools/feature.py
@@ -0,0 +1,32 @@
+from torch.utils.data import DataLoader
+
+from einops import reduce
+
+from sfi.datasets import ImageSingleton
+from sfi.features import FeatureExtractor
+from sfi.io import ArrayIO
+
+
+def main(args):
+    extract = FeatureExtractor(image_size=args.image_size)
+
+    # We use this tool to compute query features on images of arbitrary sizes.
+    # That's why we can not batch images and have to feed them one by one.
+
+    dataset = ImageSingleton(root=args.frame, transform=extract.transform)
+    loader = DataLoader(dataset, batch_size=1, num_workers=0)
+
+    for images, paths in loader:
+        assert images.size(0) == 1, "image batch size of one for required"
+
+        n, c, h, w = images.size(0), 2048, images.size(2), images.size(3)
+
+        # resnet5 downsamples x2 five times
+        h, w = h // 32, w // 32
+
+        # MAC feature descriptor
+        features = extract(images)
+        features = reduce(features, "n (h w) c -> n c", "max", n=n, h=h, w=w, c=c)
+        features = features.data.cpu().numpy()
+
+        ArrayIO.save(args.feature, features[0])
diff --git a/sfi/tools/feature3d.py b/sfi/tools/feature3d.py
new file mode 100644
index 0000000..99f6e3d
--- /dev/null
+++ b/sfi/tools/feature3d.py
@@ -0,0 +1,66 @@
+import sys
+
+import torch
+import torch.nn as nn
+
+from torchvision.models.video import r2plus1d_18
+
+from einops import rearrange
+
+from skvideo.io import vread
+
+from sfi.utils import batched
+
+
+def main(args):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    if torch.cuda.is_available():
+        print("Using CUDA, benchmarking implementations", file=sys.stderr)
+        torch.backends.cudnn.benchmark = True
+
+    # r2d2 says "beep beep"
+    resnet = r2plus1d_18(pretrained=True, progress=False)
+
+    resnet.fc = nn.Identity()
+    # resnet.avgpool = nn.Identity()
+
+    for params in resnet.parameters():
+        params.requires_grad = False
+
+    resnet = resnet.to(device)
+    resnet = nn.DataParallel(resnet)
+
+    resnet.eval()
+
+    # Pre-trained Kinetics-400 statistics for normalization
+    mean, std = [0.43216, 0.394666, 0.37645], [0.22803, 0.22145, 0.216989]
+
+    mean = rearrange(torch.as_tensor(mean), "n -> () n () ()")
+    std = rearrange(torch.as_tensor(std), "n -> () n () ()")
+
+    video = vread(str(args.video))
+
+    with torch.no_grad():
+        for i, batch in enumerate(batched(video, args.timesteps)):
+            # TODO:
+            # - encapsulate video dataset
+            # - abstract away transforms
+            # - fix timesteps vs batching
+
+            batch = rearrange(batch, "t h w c -> t c h w")
+            batch = torch.tensor(batch)
+            batch = batch.to(torch.float32) / 255
+
+            batch = (batch - mean) / std
+
+            # model expects NxCxTxHxW
+            inputs = rearrange(batch, "t c h w -> () c t h w")
+            inputs = inputs.to(device)
+
+            outputs = resnet(inputs)
+            outputs = rearrange(outputs, "() n -> n")
+            outputs = outputs.data.cpu().numpy()
+
+            print("seq={}, frames=range({}, {}), prediction={}"
+                  .format(i, i * args.timesteps, (i + 1) * args.timesteps, outputs.shape))
diff --git a/sfi/tools/frames.py b/sfi/tools/frames.py
new file mode 100644
index 0000000..814bca5
--- /dev/null
+++ b/sfi/tools/frames.py
@@ -0,0 +1,67 @@
+import sys
+
+from torch.utils.data import DataLoader
+
+from PIL import Image
+
+import numpy as np
+from einops import reduce
+
+from skvideo.io import vread
+
+from sfi.features import FeatureExtractor
+from sfi.utils import batched
+
+
+def main(args):
+    args.frames.mkdir(exist_ok=True)
+
+    key = None
+    video = vread(str(args.video))
+    extract = FeatureExtractor(image_size=args.image_size)
+
+    nframes, nkeys = 0, 0
+
+    for i, batch in enumerate(batched(video, args.batch_size)):
+        # We should use the IterableDataset from upcoming PyTorch version for FramesDataset
+
+        frames = [Image.fromarray(each) for each in batch]
+
+        dataset = [extract.transform(frame) for frame in frames]
+        dataloader = DataLoader(dataset, batch_size=args.batch_size)
+
+        assert len(dataloader) == 1
+        images = next(iter(dataloader))
+
+        n, c, h, w = images.size(0), 2048, images.size(2), images.size(3)
+
+        features = extract(images)
+        features = features.data.cpu().numpy()
+
+        # resnet5 downsamples x2 five times
+        h, w = h // 32, w // 32
+
+        features = reduce(features, "n (h w) c -> n c", reduction=args.pool, n=n, h=h, w=w, c=c)
+
+        for j, (frame, feature) in enumerate(zip(frames, features)):
+            nframes += 1
+
+            fid = i * args.batch_size + j
+
+            if key:
+                prev_frame, prev_feature = key
+
+                if similarity(prev_feature, feature) > args.similarity:
+                    continue
+
+            nkeys += 1
+            key = frame, feature
+            frame.save(args.frames / "{:010d}.jpg".format(fid))
+
+    if nframes != 0:
+        print("Processed total={} keep={} drop={} ratio={}"
+              .format(nframes, nkeys, nframes - nkeys, round(nkeys / nframes, 2)), file=sys.stderr)
+
+
+def similarity(x, y):
+    return (x @ y) / (np.linalg.norm(x) * np.linalg.norm(y))
diff --git a/sfi/tools/infer.py b/sfi/tools/infer.py
new file mode 100644
index 0000000..d8dd4ae
--- /dev/null
+++ b/sfi/tools/infer.py
@@ -0,0 +1,70 @@
+import sys
+from pathlib import Path
+
+import torch
+import torch.nn as nn
+import torch.backends.cudnn
+from torch.utils.data import DataLoader
+
+from torchvision.models import resnet50
+from torchvision.transforms import Compose, Normalize, ToTensor, Resize, CenterCrop
+
+from tqdm import tqdm
+
+from sfi.io import JsonIO
+from sfi.datasets import ImageDirectory
+from sfi.transforms import ToImageMode
+
+
+def main(args):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    if torch.cuda.is_available():
+        print("Using CUDA, benchmarking implementations", file=sys.stderr)
+        torch.backends.cudnn.benchmark = True
+
+    # ImageNet statistics (because we use pre-trained model)
+    mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
+
+    transform = Compose([
+        ToImageMode("RGB"),
+        Resize(256),
+        CenterCrop(224),
+        ToTensor(),
+        Normalize(mean=mean, std=std)])
+
+    dataset = ImageDirectory(root=args.dataset, transform=transform)
+    dataloader = DataLoader(dataset, batch_size=args.batch_size, num_workers=args.num_workers)
+
+    # Binary classifier on top of resnet50
+    model = resnet50()
+    model.fc = nn.Linear(model.fc.in_features, 2)
+
+    model = model.to(device)
+    model = nn.DataParallel(model)
+
+    # Restore trained weights
+    weights = torch.load(str(args.model), map_location=device)
+    model.load_state_dict(weights)
+
+    model.eval()
+
+    results = []
+
+    with torch.no_grad():
+        for inputs, paths in tqdm(dataloader, desc="infer", unit="batch", ascii=True):
+            inputs = inputs.to(device)
+
+            outputs = model(inputs)
+
+            _, preds = torch.max(outputs, dim=1)
+            preds = preds.data.cpu().numpy()
+
+            probs = nn.functional.softmax(outputs, dim=1)
+            probs = probs.data.cpu().numpy()
+
+            for path, pred, prob in zip(paths, preds, probs):
+                result = {"class": pred.item(), "probability": round(prob.max().item(), 3), "path": Path(path).name}
+                results.append(result)
+
+    JsonIO.save(args.results, results)
diff --git a/sfi/tools/server.py b/sfi/tools/server.py
new file mode 100644
index 0000000..6b263be
--- /dev/null
+++ b/sfi/tools/server.py
@@ -0,0 +1,58 @@
+import sys
+import base64
+import binascii
+
+import numpy as np
+from einops import rearrange
+
+from flask import Flask, request, jsonify, abort
+
+from sfi.index import Index, IndexQueryError
+
+
+app = Flask(__name__)
+index = None
+
+
+@app.route("/", methods=["POST"])
+def query():
+    if not index:
+        return abort(503)
+
+    req = request.get_json(force=True, silent=False, cache=False)
+
+    if not all(v in req for v in ["feature", "shape", "dtype"]):
+        return abort(400)
+
+    try:
+        feature = base64.b64decode(req["feature"])
+    except binascii.Error:
+        return abort(400)
+
+    N, C = req["shape"]
+    dtype = req["dtype"]
+
+    try:
+        vs = np.frombuffer(feature, dtype=dtype)
+        vs = rearrange(vs, "(n c) -> n c", n=N, c=C)
+    except ValueError:
+        return abort(400)
+
+    num_results = req.get("num_results", 1)
+
+    try:
+        results = index.query(vs, num_results=num_results)
+    except IndexQueryError:
+        return abort(400)
+
+    return jsonify([{"distance": d, "path": p} for d, p in results])
+
+
+def main(args):
+    print("Loading index from disk", file=sys.stderr)
+
+    global index
+    index = Index(path=args.index, metadata=args.index.with_suffix(".json"),
+                  features_size=args.features_size, num_probes=args.num_probes)
+
+    app.run(host=args.host, port=args.port, debug=False)
diff --git a/sfi/tools/stream.py b/sfi/tools/stream.py
new file mode 100644
index 0000000..102421d
--- /dev/null
+++ b/sfi/tools/stream.py
@@ -0,0 +1,92 @@
+import sys
+from pathlib import Path
+
+from torch.utils.data import DataLoader, random_split
+
+import numpy as np
+from einops import reduce
+
+from faiss import IndexFlatL2, IndexIVFPQ
+
+from tqdm import tqdm
+
+from sfi.datasets import ImageDirectory
+from sfi.features import FeatureExtractor
+from sfi.io import IndexIO, JsonIO
+
+kNumCells = 100
+kNumCentroids = 256  # Note: on gpu this will not work; see links below
+kNumBitsPerIdx = 8
+
+# Gpu centroid limitations
+# - https://github.com/facebookresearch/faiss/blob/a8118acbc516b0263dde610862c806400cc48bf5/gpu/impl/IVFPQ.cu#L69-L92
+# - https://github.com/facebookresearch/faiss/blob/a8118acbc516b0263dde610862c806400cc48bf5/ProductQuantizer.cpp#L189
+
+
+def main(args):
+    # https://github.com/facebookresearch/faiss/blob/a8118acbc516b0263dde610862c806400cc48bf5/Clustering.cpp#L78-L80
+    if args.num_train < max(kNumCells, kNumCentroids):
+        sys.exit("Error: require at least {} training samples".format(max(kNumCells, kNumCentroids)))
+
+    extract = FeatureExtractor(image_size=args.image_size)
+
+    dataset = ImageDirectory(root=args.frames, transform=extract.transform)
+    train_dataset, index_dataset = random_split(dataset, [args.num_train, len(dataset) - args.num_train])
+
+    if len(train_dataset) > len(index_dataset) or len(train_dataset) > 0.25 * len(index_dataset):
+        sys.exit("Error: training dataset too big: train={}, index={}".format(len(train_dataset), len(index_dataset)))
+
+    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
+    index_loader = DataLoader(index_dataset, batch_size=args.batch_size, num_workers=args.num_workers)
+
+    N, C = len(train_dataset), 2048
+
+    train_features = np.empty(shape=(N, C), dtype=np.float32)
+
+    for i, (images, paths) in enumerate(tqdm(train_loader, desc="Train", unit="batch", ascii=True)):
+        n, h, w = images.size(0), images.size(2), images.size(3)
+
+        features = extract(images)
+        features = features.data.cpu().numpy()
+
+        # resnet5 downsamples x2 five times
+        h, w = h // 32, w // 32
+
+        # MAC feature
+        features = reduce(features, "n (h w) c -> n c", "max", n=n, h=h, w=w, c=C)
+
+        train_features[i * args.batch_size: i * args.batch_size + n] = features
+
+    quantizer = IndexFlatL2(C)
+
+    index = IndexIVFPQ(quantizer, C, kNumCells, kNumCentroids, kNumBitsPerIdx)
+    index.do_polysemous_training = True
+
+    print("Training index on training features", file=sys.stderr)
+    index.train(train_features)
+
+    metadata = []
+
+    for images, paths in tqdm(index_loader, desc="Index", unit="batch", ascii=True):
+        n, h, w = images.size(0), images.size(2), images.size(3)
+
+        # resnet5 downsamples x2 five times
+        h, w = h // 32, w // 32
+
+        # MAC feature descriptor
+        features = extract(images)
+        features = reduce(features, "n (h w) c -> n c", "max", n=n, h=h, w=w, c=C)
+        features = features.data.cpu().numpy()
+
+        # C-array required for faiss FFI: tensors might not be contiguous
+        features = np.ascontiguousarray(features)
+
+        # Add a batch of (batch*49, 2048) unpooled features to the index at once
+        index.add(features)
+
+        for path in paths:
+            fname = Path(path).name
+            metadata.append(fname)
+
+    IndexIO.save(args.index.with_suffix(".idx"), index)
+    JsonIO.save(args.index.with_suffix(".json"), metadata)
diff --git a/sfi/tools/train.py b/sfi/tools/train.py
new file mode 100644
index 0000000..fe0ca8b
--- /dev/null
+++ b/sfi/tools/train.py
@@ -0,0 +1,153 @@
+import sys
+import copy
+import collections
+
+import torch
+import torch.nn as nn
+import torch.optim
+import torch.backends.cudnn
+from torch.utils.data import DataLoader
+
+from torchvision.models import resnet50
+from torchvision.datasets import ImageFolder
+from torchvision.transforms import Compose, Normalize, ToTensor, Resize, RandomHorizontalFlip
+
+from tqdm import tqdm
+
+from sfi.transforms import ToImageMode
+from sfi.mixup import MixupDataLoaderAdaptor, MixupCrossEntropyLossAdaptor
+from sfi.utils import decay_weights
+
+
+def main(args):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    if torch.cuda.is_available():
+        print("Using CUDA, benchmarking implementations", file=sys.stderr)
+        torch.backends.cudnn.benchmark = True
+
+    # ImageNet statistics (because we use pre-trained model)
+    mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]
+
+    transform = Compose([
+        ToImageMode("RGB"),
+        Resize(256),
+        RandomHorizontalFlip(),
+        ToTensor(),
+        Normalize(mean=mean, std=std)])
+
+    train_dataset = ImageFolder(root=args.dataset / "train", transform=transform)
+    val_dataset = ImageFolder(root=args.dataset / "val", transform=transform)
+
+    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True)
+    train_loader = MixupDataLoaderAdaptor(train_loader)
+
+    val_loader = DataLoader(val_dataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False)
+
+    model = resnet50(pretrained=True, progress=False)
+
+    # Add binary classification head
+    model.fc = nn.Linear(model.fc.in_features, 2)
+
+    model = model.to(device)
+    model = nn.DataParallel(model)
+
+    if args.resume_from:
+        weights = torch.load(str(args.resume_from), map_location=device)
+        model.load_state_dict(weights)
+
+    optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
+
+    counts = collections.Counter(train_dataset.targets).values()
+    weight = torch.tensor([min(counts) / v for v in counts]).to(device)
+
+    train_criterion = MixupCrossEntropyLossAdaptor(weight=weight)
+    val_criterion = nn.CrossEntropyLoss(weight=weight)
+
+    best_wts = copy.deepcopy(model.state_dict())
+    best_acc = 0.0
+
+    for epoch in range(args.num_epochs):
+        print("Epoch {}/{}".format(epoch, args.num_epochs - 1))
+        print("-" * 10)
+
+        loss, _, _, _ = train(model, train_criterion, optimizer, device,
+                              dataset=train_dataset, dataloader=train_loader)
+
+        print("train loss: {:.4f}".format(loss))
+
+        loss, acc, precision, recall = validate(model, val_criterion, device,
+                                                dataset=val_dataset, dataloader=val_loader)
+
+        print("val loss: {:.4f} acc: {:.4f} precision: {:.4f} recall: {:.4f}".format(loss, acc, precision, recall))
+
+        if acc > best_acc:
+            best_acc = acc
+            best_wts = copy.deepcopy(model.state_dict())
+
+        print()
+
+    print("Best acc: {:4f}".format(best_acc))
+
+    torch.save(best_wts, str(args.model))
+
+
+def train(model, criterion, optimizer, device, dataset, dataloader):
+    model.train()
+
+    running_loss = 0.0
+
+    for inputs, t, labels1, labels2 in tqdm(dataloader, desc="train", unit="batch", ascii=True):
+        inputs = inputs.to(device)
+        t = t.to(device)
+        labels1 = labels1.to(device)
+        labels2 = labels2.to(device)
+
+        optimizer.zero_grad()
+
+        outputs = model(inputs)
+
+        loss = criterion(outputs, t, labels1, labels2)
+
+        loss.backward()
+        decay_weights(optimizer, 1e-4)
+        optimizer.step()
+
+        running_loss += loss.item() * inputs.size(0)
+
+    epoch_loss = running_loss / len(dataset)
+
+    return epoch_loss, float("NaN"), float("NaN"), float("NaN")
+
+
+def validate(model, criterion, device, dataset, dataloader):
+    model.eval()
+
+    running_loss = 0.0
+    tn, fn, tp, fp = 0, 0, 0, 0
+
+    with torch.no_grad():
+        for inputs, labels in tqdm(dataloader, desc="val", unit="batch", ascii=True):
+            inputs = inputs.to(device)
+            labels = labels.to(device)
+
+            outputs = model(inputs)
+            _, preds = torch.max(outputs, dim=1)
+
+            loss = criterion(outputs, labels)
+
+            running_loss += loss.item() * inputs.size(0)
+
+            confusion = preds.float() / labels.float()
+            tn += torch.sum(torch.isnan(confusion)).item()
+            fn += torch.sum(confusion == float("inf")).item()
+            tp += torch.sum(confusion == 1).item()
+            fp += torch.sum(confusion == 0).item()
+
+    epoch_loss = running_loss / len(dataset)
+
+    accuracy = (tp + tn) / (tp + tn + fp + fn)
+    precision = tp / (tp + fp)
+    recall = tp / (tp + fn)
+
+    return epoch_loss, accuracy, precision, recall
diff --git a/sfi/transforms.py b/sfi/transforms.py
new file mode 100644
index 0000000..4320a9b
--- /dev/null
+++ b/sfi/transforms.py
@@ -0,0 +1,44 @@
+import torchvision.transforms.functional as F
+
+
+def to_image_mode(image, mode):
+    return image.convert(mode)
+
+
+class ToImageMode:
+    def __init__(self, mode):
+        self.mode = mode
+
+    def __call__(self, image):
+        return to_image_mode(image, self.mode)
+
+
+def pad_to_multiple(image, multiple, fill=0, padding_mode="constant"):
+    w, h = image.size
+
+    def next_multiple_of(n, multiple):
+        return ((n // multiple) + int(bool(n % multiple))) * multiple
+
+    padded_w = next_multiple_of(w, multiple)
+    padded_h = next_multiple_of(h, multiple)
+
+    pad_left = (padded_w - w) // 2
+    pad_right = pad_left + (padded_w - w) % 2
+
+    pad_top = (padded_h - h) // 2
+    pad_bottom = pad_top + (padded_h - h) % 2
+
+    padding = (pad_left, pad_top, pad_right, pad_bottom)
+
+    return F.pad(image, padding, fill=fill, padding_mode=padding_mode)
+
+
+class PadToMultiple:
+    def __init__(self, multiple, fill=0, padding_mode="constant"):
+        self.multiple = multiple
+        self.fill = fill
+        self.padding_mode = padding_mode
+
+    def __call__(self, image):
+        return pad_to_multiple(image, multiple=self.multiple, fill=self.fill,
+                               padding_mode=self.padding_mode)
diff --git a/sfi/utils.py b/sfi/utils.py
new file mode 100644
index 0000000..ad655be
--- /dev/null
+++ b/sfi/utils.py
@@ -0,0 +1,20 @@
+import itertools
+
+
+def batched(iterable, n):
+    counter = itertools.count()
+
+    for _, group in itertools.groupby(iterable, lambda _: next(counter) // n):
+        yield list(group)
+
+
+def files(path):
+    return sorted([p for p in path.iterdir() if p.is_file()])
+
+
+# Proper weight decay for Adam, not L2 penalty
+# https://github.com/pytorch/pytorch/pull/4429
+def decay_weights(optimizer, v):
+    for group in optimizer.param_groups:
+        for param in group["params"]:
+            param.data.add_(-v * group["lr"])