apulis
diff --git a/‎Makefile
Lines changed: 13 additions & 2 deletions b/‎Makefile
Lines changed: 13 additions & 2 deletions
diff --git a/‎batcher.Dockerfile
Lines changed: 21 additions & 0 deletions b/‎batcher.Dockerfile
Lines changed: 21 additions & 0 deletions
diff --git a/‎cmd/batcher/main.go
Lines changed: 66 additions & 0 deletions b/‎cmd/batcher/main.go
Lines changed: 66 additions & 0 deletions
diff --git a/‎config/default/configmap/inferenceservice.yaml
Lines changed: 8 additions & 0 deletions b/‎config/default/configmap/inferenceservice.yaml
Lines changed: 8 additions & 0 deletions
diff --git a/‎config/overlays/development/configmap/ko_resolve_batcher
Lines changed: 1 addition & 0 deletions b/‎config/overlays/development/configmap/ko_resolve_batcher
Lines changed: 1 addition & 0 deletions
diff --git a/‎config/overlays/test/configmap/inferenceservice.yaml
Lines changed: 8 additions & 0 deletions b/‎config/overlays/test/configmap/inferenceservice.yaml
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/diagrams/batcher.jpg
100 KB b/‎docs/diagrams/batcher.jpg
100 KB
diff --git a/‎docs/samples/batcher/README.md
Lines changed: 39 additions & 0 deletions b/‎docs/samples/batcher/README.md
Lines changed: 39 additions & 0 deletions
diff --git a/‎docs/samples/batcher/basic/README.md
Lines changed: 88 additions & 0 deletions b/‎docs/samples/batcher/basic/README.md
Lines changed: 88 additions & 0 deletions
@@ -3,6 +3,7 @@ HAS_LINT := $(shell command -v golint;)
 # Image URL to use all building/pushing image targets
 IMG ?= kfserving-controller:latest
 LOGGER_IMG ?= logger:latest
+BATCHER_IMG ?= batcher:latest
 SKLEARN_IMG ?= sklearnserver:latest
 XGB_IMG ?= xgbserver:latest
 PYTORCH_IMG ?= pytorchserver:latest
@@ -17,7 +18,7 @@ KFSERVING_CONTROLLER_MEMORY_LIMIT ?= 300Mi
 $(shell perl -pi -e 's/cpu:.*/cpu: $(KFSERVING_CONTROLLER_CPU_LIMIT)/' config/default/manager_resources_patch.yaml)
 $(shell perl -pi -e 's/memory:.*/memory: $(KFSERVING_CONTROLLER_MEMORY_LIMIT)/' config/default/manager_resources_patch.yaml)
 
-all: test manager logger
+all: test manager logger batcher
 
 # Run tests
 test: fmt vet lint manifests
@@ -27,10 +28,14 @@ test: fmt vet lint manifests
 manager: generate fmt vet lint
 	go build -o bin/manager ./cmd/manager
 
-# Build manager binary
+# Build logger binary
 logger: fmt vet
 	go build -o bin/logger ./cmd/logger
 
+# Build batcher binary
+batcher: fmt vet
+	go build -o bin/batcher ./cmd/batcher
+
 # Run against the configured Kubernetes cluster in ~/.kube/config
 run: generate fmt vet lint
 	go run ./cmd/manager/main.go
@@ -138,6 +143,12 @@ docker-build-logger: test
 docker-push-logger:
 	docker push ${LOGGER_IMG}
 
+docker-build-batcher:
+	docker build -f batcher.Dockerfile . -t ${BATCHER_IMG}
+
+docker-push-batcher:
+	docker push ${BATCHER_IMG}
+
 docker-build-sklearn: 
 	cd python && docker build -t ${KO_DOCKER_REPO}/${SKLEARN_IMG} -f sklearn.Dockerfile .
 
 
@@ -0,0 +1,21 @@
+# Build the inference-batcher binary
+FROM golang:1.13.0 as builder
+
+# Copy in the go src
+WORKDIR /go/src/github.com/kubeflow/kfserving
+COPY pkg/    pkg/
+COPY cmd/    cmd/
+COPY go.mod  go.mod
+COPY go.sum  go.sum
+
+RUN go mod download
+
+# Build
+RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -o batcher ./cmd/batcher
+
+# Copy the inference-batcher into a thin image
+FROM gcr.io/distroless/static:latest
+COPY third_party/ third_party/
+WORKDIR /
+COPY --from=builder /go/src/github.com/kubeflow/kfserving/batcher .
+ENTRYPOINT ["/batcher"]
@@ -0,0 +1,66 @@
+/*
+Copyright 2020 kubeflow.org.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main
+
+import (
+	"errors"
+	"flag"
+	"os"
+	"strconv"
+	"github.com/kubeflow/kfserving/pkg/batcher"
+	"github.com/kubeflow/kfserving/pkg/batcher/controllers"
+	logf "sigs.k8s.io/controller-runtime/pkg/runtime/log"
+)
+
+var (
+	port          = flag.String("port", "9082", "Batcher port")
+	componentHost = flag.String("component-host", "127.0.0.1", "Component host")
+	componentPort = flag.String("component-port", "8080", "Component port")
+	maxBatchSize  = flag.String("max-batchsize", "32", "Max Batch Size")
+	maxLatency    = flag.String("max-latency", "5000", "Max Latency in milliseconds")
+	timeout       = flag.String("timeout", "60", "Timeout of calling predictor service in seconds")
+)
+
+func main() {
+	flag.Parse()
+
+	logf.SetLogger(logf.ZapLogger(false))
+	log := logf.Log.WithName("entrypoint")
+
+	maxBatchSizeInt, err := strconv.Atoi(*maxBatchSize)
+	if err != nil || maxBatchSizeInt <= 0 {
+		log.Error(errors.New("Invalid max batch size"), *maxBatchSize)
+		os.Exit(1)
+	}
+
+	maxLatencyInt, err := strconv.Atoi(*maxLatency)
+	if err != nil || maxLatencyInt <= 0 {
+		log.Error(errors.New("Invalid max latency"), *maxLatency)
+		os.Exit(1)
+	}
+
+	timeoutInt, err := strconv.Atoi(*timeout)
+	if err != nil || timeoutInt <= 0 {
+		log.Error(errors.New("Invalid timeout"), *timeout)
+		os.Exit(1)
+	}
+
+	controllers.Config(*port, *componentHost, *componentPort, maxBatchSizeInt, maxLatencyInt, timeoutInt)
+
+	log.Info("Starting", "Port", *port)
+	batcher.StartHttpServer()
+}
@@ -103,3 +103,11 @@ data:
         "cpuRequest": "100m",
         "cpuLimit": "1"
     }
+  batcher: |-
+    {
+        "image" : "gcr.io/kfserving/batcher:latest",
+        "memoryRequest": "1Gi",
+        "memoryLimit": "1Gi",
+        "cpuRequest": "1",
+        "cpuLimit": "1"
+    }
@@ -0,0 +1 @@
+image: github.com/kubeflow/kfserving/cmd/batcher
@@ -103,3 +103,11 @@ data:
         "cpuRequest": "100m",
         "cpuLimit": "1"
     }
+  batcher: |-
+    {
+        "image" : "gcr.io/kubeflow-ci/kfserving/batcher",
+        "memoryRequest": "1Gi",
+        "memoryLimit": "1Gi",
+        "cpuRequest": "1",
+        "cpuLimit": "1"
+    }
@@ -0,0 +1,39 @@
+# Inference Batcher
+
+We add this module to support batching predict for any ML frameworks (TensorFlow, PyTorch, ...) without decreasing the performance.
+
+This batcher module also support customized images.
+
+![Batcher](../../diagrams/batcher.jpg)
+
+* We use webhook to inject the batcher container into the InferenceService. 
+* We choose Beego web framework to accept user requests.
+* We use channels to transfer data between go routines.
+* We use HTTP connections between containers. In the future, we may use RPC.
+* Users can choose to use the batcher or not by changing the yaml file of InferenceService.
+* When the number of instances (For example, the number of pictures) reaches the maxBatchSize or the latency meets the maxLatency, a batching predict will be triggered.
+```
+apiVersion: "serving.kubeflow.org/v1alpha2"
+kind: "InferenceService"
+metadata:
+  name: "pytorch-cifar10"
+spec:
+  default:
+    predictor:
+      minReplicas: 1
+      batcher:
+        maxBatchSize: 32
+        maxLatency: 5000
+        timeout: 60
+      pytorch:
+        storageUri: "gs://kfserving-samples/models/pytorch/cifar10/"
+```
+* port: the port of batcher container.
+* maxBatchSize: the max batch size for predict.
+* maxLatency: the max latency for predict (In milliseconds).
+* timeout: timeout of calling predictor service (In seconds).
+
+All of the bellowing fields have default values in the code. You can config them or not as you wish.
+* maxBatchSize: 32.
+* maxLatency: 5000.
+* timeout: 60.
@@ -0,0 +1,88 @@
+# Inference Batcher Demo
+
+We first create a pytorch predictor with a batcher. The "maxLatency" is set to a big value (5000 milliseconds) to make us be able to observe the batching process.
+
+```
+apiVersion: "serving.kubeflow.org/v1alpha2"
+kind: "InferenceService"
+metadata:
+  name: "pytorch-cifar10"
+spec:
+  default:
+    predictor:
+      minReplicas: 1
+      batcher:
+        maxBatchSize: 32
+        maxLatency: 5000
+        timeout: 60
+      pytorch:
+        storageUri: "gs://kfserving-samples/models/pytorch/cifar10/"
+```
+
+Let's apply this yaml:
+
+```
+kubectl create -f pytorch-batcher.yaml
+```
+
+We can now send requests to the pytorch model using hey.
+
+```
+MODEL_NAME=pytorch-cifar10
+INPUT_PATH=@./input.json
+INGRESS_GATEWAY=istio-ingressgateway
+CLUSTER_IP=$(kubectl -n istio-system get service $INGRESS_GATEWAY -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
+SERVICE_HOSTNAME=$(kubectl get inferenceservice pytorch-cifar10 -o jsonpath='{.status.url}' | cut -d "/" -f 3)
+hey -z 10s -c 5 -m POST -host "${SERVICE_HOSTNAME}" -H "Content-Type: application/json" -D ./input.json "http://$CLUSTER_IP/v1/models/$MODEL_NAME:predict"
+```
+
+The request will go to the batcher container first, and then the batcher container will do batching and send the batching request to the predictor container.
+
+Notice: If the interval of sending the two requests is less than "maxLatency", the returned "batchId" will be the same.
+
+Expected Output for each ssh terminal tab.
+
+```
+Summary:
+  Total:        10.6268 secs
+  Slowest:      1.6477 secs
+  Fastest:      0.0050 secs
+  Average:      0.1006 secs
+  Requests/sec: 48.1800
+
+  Total data:   167424 bytes
+  Size/request: 327 bytes
+
+Response time histogram:
+  0.005 [1]     |
+  0.169 [447]   |■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■
+  0.334 [30]    |■■■
+  0.498 [7]     |■
+  0.662 [10]    |■
+  0.826 [3]     |
+  0.991 [6]     |■
+  1.155 [5]     |
+  1.319 [1]     |
+  1.483 [1]     |
+  1.648 [1]     |
+
+
+Latency distribution:
+  10% in 0.0079 secs
+  25% in 0.0114 secs
+  50% in 0.0398 secs
+  75% in 0.0867 secs
+  90% in 0.2029 secs
+  95% in 0.5170 secs
+  99% in 1.1428 secs
+
+Details (average, fastest, slowest):
+  DNS+dialup:   0.0000 secs, 0.0050 secs, 1.6477 secs
+  DNS-lookup:   0.0000 secs, 0.0000 secs, 0.0000 secs
+  req write:    0.0002 secs, 0.0001 secs, 0.0004 secs
+  resp wait:    0.1000 secs, 0.0046 secs, 1.6473 secs
+  resp read:    0.0003 secs, 0.0000 secs, 0.0620 secs
+
+Status code distribution:
+  [200] 512 responses
+```
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+image: github.com/kubeflow/kfserving/cmd/batcher`