-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 1c5c3fc
Showing
36 changed files
with
1,584 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
.git | ||
.gitignore |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[flake8] | ||
max-line-length = 120 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
__pycache__ | ||
*.py[cod] | ||
|
||
*.pth | ||
*.pb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
FROM ubuntu:18.04 | ||
|
||
WORKDIR /usr/src/app | ||
|
||
ENV LANG="C.UTF-8" LC_ALL="C.UTF-8" PATH="/opt/venv/bin:$PATH" PIP_NO_CACHE_DIR="false" CFLAGS="-mavx2" CXXFLAGS="-mavx2" | ||
|
||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | ||
python3 python3-pip python3-venv \ | ||
wget make g++ ffmpeg python3-dev libblas-dev liblapack-dev swig \ | ||
cmake yasm zlib1g-dev && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
COPY requirements.txt . | ||
|
||
RUN python3 -m venv /opt/venv && \ | ||
python3 -m pip install pip==19.2.3 pip-tools==4.0.0 | ||
|
||
# For pytorch and torchvision we need platform specific (cpu vs. gpu) wheels from | ||
# https://download.pytorch.org/whl/cpu/torch_stable.html | ||
# To generate hashes run: python3 -m pip hash *.whl | ||
RUN echo "https://download.pytorch.org/whl/cpu/torch-1.2.0%2Bcpu-cp36-cp36m-manylinux1_x86_64.whl \ | ||
--hash=sha256:7b9b943673d3acb446248ba0d6feed6926bf60ce719ace4707a6559c1f57ced7 \ | ||
\n \ | ||
https://download.pytorch.org/whl/cpu/torchvision-0.4.0%2Bcpu-cp36-cp36m-manylinux1_x86_64.whl \ | ||
--hash=sha256:63f342b858b18839fcf3ff8ad857e44a4ff0fcb8cb8e2bdc2f4ed9afa7cec9e0 \ | ||
\n" >> requirements.txt && cat requirements.txt | ||
|
||
RUN python3 -m piptools sync | ||
|
||
RUN python3 -c "from torchvision.models import resnet50; resnet50(pretrained=True, progress=False)" && \ | ||
python3 -c "from torchvision.models.video import r2plus1d_18; r2plus1d_18(pretrained=True, progress=False)" | ||
|
||
RUN wget -q https://github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.3.tar.gz -O libjpeg-turbo.tar.gz && \ | ||
echo "a69598bf079463b34d45ca7268462a18b6507fdaa62bb1dfd212f02041499b5d libjpeg-turbo.tar.gz" | sha256sum -c && \ | ||
tar xf libjpeg-turbo.tar.gz && \ | ||
rm libjpeg-turbo.tar.gz && \ | ||
cd libjpeg-turbo* && \ | ||
mkdir build && \ | ||
cd build && \ | ||
cmake -DCMAKE_BUILD_TYPE=Release -DREQUIRE_SIMD=On -DCMAKE_INSTALL_PREFIX=/usr/local .. && \ | ||
make -j $(nproc) && \ | ||
make install && \ | ||
ldconfig && \ | ||
cd ../../ && \ | ||
rm -rf libjpeg-turbo* | ||
|
||
RUN python3 -m pip uninstall -y pillow && \ | ||
python3 -m pip install --no-binary :all: --compile pillow-simd==6.0.0.post0 | ||
|
||
RUN wget -q https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz -O faiss.tar.gz && \ | ||
echo "b24d347b0285d01c2ed663ccc7596cd0ea95071f3dd5ebb573ccfc28f15f043b faiss.tar.gz" | sha256sum -c && \ | ||
tar xf faiss.tar.gz && \ | ||
rm faiss.tar.gz && \ | ||
cd faiss* && \ | ||
./configure --without-cuda && \ | ||
make -j $(nproc) && \ | ||
make -j $(nproc) -C python && \ | ||
make install && \ | ||
make -C python install && \ | ||
cd .. && \ | ||
rm -rf faiss* | ||
|
||
COPY . . | ||
|
||
EXPOSE 5000 | ||
ENTRYPOINT ["/usr/src/app/bin/sfi"] | ||
CMD ["-h"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
FROM nvidia/cuda:10.1-cudnn7-devel | ||
|
||
WORKDIR /usr/src/app | ||
|
||
ENV LANG="C.UTF-8" LC_ALL="C.UTF-8" PATH="/opt/venv/bin:$PATH" PIP_NO_CACHE_DIR="false" CFLAGS="-mavx2" CXXFLAGS="-mavx2" | ||
|
||
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ | ||
python3 python3-pip python3-venv \ | ||
wget make g++ ffmpeg python3-dev libblas-dev liblapack-dev swig \ | ||
cmake yasm zlib1g-dev && \ | ||
rm -rf /var/lib/apt/lists/* | ||
|
||
COPY requirements.txt . | ||
|
||
RUN python3 -m venv /opt/venv && \ | ||
python3 -m pip install pip==19.2.3 pip-tools==4.0.0 | ||
|
||
# For pytorch and torchvision we need platform specific (cpu vs. gpu) wheels from | ||
# https://download.pytorch.org/whl/cu100/torch_stable.html | ||
# To generate hashes run: python3 -m pip hash *.whl | ||
RUN echo "https://download.pytorch.org/whl/cu100/torch-1.2.0-cp36-cp36m-manylinux1_x86_64.whl \ | ||
--hash=sha256:a13bf6f78a49d844b85c142b8cd62d2e1833a11ed21ea0bc6b1ac73d24c76415 \ | ||
\n \ | ||
https://download.pytorch.org/whl/cu100/torchvision-0.4.0-cp36-cp36m-manylinux1_x86_64.whl \ | ||
--hash=sha256:2f67efdf6edd9ea7f9cd9a3917ae5c63d5684e3bdb5cc9c2b364c15bdfe4456b \ | ||
\n" >> requirements.txt | ||
|
||
RUN python3 -m piptools sync | ||
|
||
RUN python3 -c "from torchvision.models import resnet50; resnet50(pretrained=True, progress=False)" && \ | ||
python3 -c "from torchvision.models.video import r2plus1d_18; r2plus1d_18(pretrained=True, progress=False)" | ||
|
||
RUN wget -q https://github.com/libjpeg-turbo/libjpeg-turbo/archive/2.0.3.tar.gz -O libjpeg-turbo.tar.gz && \ | ||
echo "a69598bf079463b34d45ca7268462a18b6507fdaa62bb1dfd212f02041499b5d libjpeg-turbo.tar.gz" | sha256sum -c && \ | ||
tar xf libjpeg-turbo.tar.gz && \ | ||
rm libjpeg-turbo.tar.gz && \ | ||
cd libjpeg-turbo* && \ | ||
mkdir build && \ | ||
cd build && \ | ||
cmake -DCMAKE_BUILD_TYPE=Release -DREQUIRE_SIMD=On -DCMAKE_INSTALL_PREFIX=/usr/local .. && \ | ||
make -j $(nproc) && \ | ||
make install && \ | ||
ldconfig && \ | ||
cd ../../ && \ | ||
rm -rf libjpeg-turbo* | ||
|
||
|
||
RUN python3 -m pip uninstall -y pillow && \ | ||
python3 -m pip install --no-binary :all: --compile pillow-simd==6.0.0.post0 | ||
|
||
RUN wget -q https://github.com/facebookresearch/faiss/archive/v1.5.3.tar.gz -O faiss.tar.gz && \ | ||
echo "b24d347b0285d01c2ed663ccc7596cd0ea95071f3dd5ebb573ccfc28f15f043b faiss.tar.gz" | sha256sum -c && \ | ||
tar xf faiss.tar.gz && \ | ||
rm faiss.tar.gz && \ | ||
cd faiss* && \ | ||
./configure --with-cuda-arch="-gencode=arch=compute_37,code=compute_37 -gencode=arch=compute_70,code=compute_70" --with-cuda="/usr/local/cuda" && \ | ||
make -j $(nproc) && \ | ||
make -j $(nproc) -C python && \ | ||
make install && \ | ||
make -C python install && \ | ||
cd .. && \ | ||
rm -rf faiss* | ||
|
||
COPY . . | ||
|
||
EXPOSE 5000 | ||
ENTRYPOINT ["/usr/src/app/bin/sfi"] | ||
CMD ["-h"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2019 MoabitCoin | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
dockerimage ?= moabitcoin/sfi | ||
dockerfile ?= Dockerfile.cpu | ||
srcdir ?= $(shell pwd) | ||
datadir ?= $(shell pwd) | ||
|
||
install: | ||
@docker build -t $(dockerimage) -f $(dockerfile) . | ||
|
||
i: install | ||
|
||
|
||
update: | ||
@docker build -t $(dockerimage) -f $(dockerfile) . --pull --no-cache | ||
|
||
u: update | ||
|
||
|
||
run: | ||
@docker run -it --rm --ipc="host" --network="host" -p 5000:5000 -v $(srcdir)/sfi:/usr/src/app/sfi -v $(datadir):/data --entrypoint=/bin/bash $(dockerimage) | ||
|
||
r: run | ||
|
||
|
||
publish: | ||
@docker image save $(dockerimage) \ | ||
| pv -N "Publish $(dockerimage) to $(sshopts)" -s $(shell docker image inspect $(dockerimage) --format "{{.Size}}") \ | ||
| ssh $(sshopts) "docker image load" | ||
|
||
p: publish | ||
|
||
|
||
.PHONY: install i run r update u publish p |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
# Semantic Frame Index | ||
|
||
Fast and efficient queries on video frames by semantic similarity. | ||
|
||
|
||
## Use Case | ||
|
||
We record tens of thousand hours of drive video data and need to be able to search for semantically similar scenarios. | ||
Simlarity could mean similar lighting conditions, similar vehicle types, similar traffic volumes, similar objects on the road, and so on. | ||
|
||
|
||
## Implementation Sketch | ||
|
||
We | ||
- extract key frames using a neural net for frame similarity in feature space | ||
- extract a trained convolutional neural net's high level feature maps for all key frames | ||
- compute Maximum Activations of Convolution (MAC) features from the high-level feature maps | ||
- index the feature maps for approximate nearest neighbor searches based on L2 distance | ||
- query the indexed dataset for semantically similar scenarios | ||
|
||
|
||
## Usage | ||
|
||
All tools can be invoked via | ||
|
||
./bin/sfi <tool> <args> | ||
|
||
./bin/sfi --help | ||
./bin/sfi <tool> --help | ||
|
||
|
||
### stream-index | ||
|
||
Builds an index from a directory of images for fast and efficient approximate nearest neighbor queries based on L2 distance. | ||
The quantizer for the index needs to get trained on a small subset of the feature maps to approximate the dataset's centroids. | ||
We recommend runing this step on GPUs. | ||
|
||
|
||
### save-feature | ||
|
||
Extracts high level feature maps and computes MACs for an image frames from a trained convolutional neural net. | ||
|
||
|
||
### save-frames | ||
|
||
Extracts semantic key frames from videos based on a trained convolution net for feature similarity between frames. | ||
|
||
|
||
### query-server | ||
|
||
Loads up the index (slow) and keeps it in memory to handle nearest neighbor queries (fast). | ||
Responds to queries by searching the index, aggregating results, and re-ranking them. | ||
|
||
|
||
### query-client | ||
|
||
Sends nearest neighbor requests against the query server and reports results to the user. | ||
The query and results are based on the saved MAC features. | ||
|
||
|
||
### model-train | ||
|
||
Trains a binary classification model on a dataset (potentially noisy and obtained from the index). | ||
We recommend runing this step on GPUs. | ||
|
||
|
||
### model-infer | ||
|
||
Predicts binary classification labels on a dataset, using a trained model. | ||
|
||
|
||
## Development | ||
|
||
Create a self-contained reproducible development environment | ||
|
||
make i | ||
|
||
Get into the development environment | ||
|
||
make r | ||
|
||
The Python source code directory is mounted into the container: if you modify it on the host it will get modified in the container. | ||
|
||
To make data visible in the container set the datadir env var, e.g. to make your `/tmp` directory show up in `/data` inside the container run | ||
|
||
make r datadir=/tmp | ||
|
||
See the `Makefile` for options and more advanced targets. | ||
|
||
|
||
## References | ||
|
||
- [Particular object retrieval with integral max-pooling of CNN activations](https://arxiv.org/abs/1511.05879) | ||
- Product Quantizer (PQ) [part 1](http://mccormickml.com/2017/10/13/product-quantizer-tutorial-part-1/), and [part 2](http://mccormickml.com/2017/10/22/product-quantizer-tutorial-part-2/) | ||
- [Product Quantization for Nearest Neighbor Search](https://hal.inria.fr/file/index/docid/514462/filename/paper_hal.pdf) | ||
- [Billion-scale similarity search with GPUs](https://arxiv.org/pdf/1702.08734.pdf) | ||
- [faiss wiki](https://github.com/facebookresearch/faiss/wiki) | ||
|
||
|
||
## License | ||
|
||
Copyright © 2019 MoabitCoin | ||
|
||
Distributed under the MIT License (MIT). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
python3 -m sfi.tools "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
numpy | ||
pillow | ||
tqdm | ||
flask | ||
requests | ||
einops | ||
scikit-video |
Oops, something went wrong.