Skip to content

Commit

Permalink
Add TensorFlow-Serving support and Dockerfile
Browse files Browse the repository at this point in the history
  • Loading branch information
OrigamiDream committed Nov 2, 2022
1 parent c6968a9 commit 988110e
Show file tree
Hide file tree
Showing 5 changed files with 220 additions and 0 deletions.
9 changes: 9 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
FROM tensorflow/serving:2.8.2

ENV MODEL_DIR ./models
ENV MODEL_NAME cort
ENV MODEL_VERSION 1

COPY $MODEL_DIR/$MODEL_NAME/$MODEL_VERSION /models/$MODEL_NAME/$MODEL_VERSION

EXPOSE 8500
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,24 @@ It has the following arguments:
Perform inference for metrics by (for example) `python run_inference.py --checkpoint_path ./finetuning-checkpoints/wandb_run_id/ckpt-0 --tfrecord_path ./data/tfrecords/{model_name}/valid.fold-1-of-10.tfrecord --concat_hidden_states 2 --repr_act tanh --repr_classifier bi_lstm --repr_size 1024`.<br>
`--concat_hidden_states`, `--repr_act`, `--repr_classifier`, `--repr_size` must be same with configurations that used for fine-tuned model's architecture.

### Serving

CoRT supports [TensorFlow Serving](https://www.tensorflow.org/tfx/guide/serving) on Docker, use `configure_docker_image.py` to build a Docker image.
It has the following arguments:
- `--checkpoint_path`: Location of trained model checkpoint. (Required)
- `--saved_model_dir`: Location of SavedModel to be stored. ('./models' as default)
- `--model_spec_name`: Name of model spec. ('cort' as default)
- `--model_spec_version`: Version of model spec. ('1' as default)
- `--signature_name`: Name of signature of SavedModel ('serving_default' as default)
- `--model_name`: Name of pre-trained models. (One of korscibert, korscielectra, huggingface models is allowed)
- `--tfrecord_path`: Location of TFRecord file for warmup requests. {model_name} is a placeholder.
- `--num_warmup_requests`: Number of warmup requests. Pass 0 to skip (10 as default)
- `--repr_classifier`: Name of classification head for classifier. (One of 'seq_cls' and 'bi_lstm' is allowed)
- `--repr_act`: Name of activation function for representation. (One of 'tanh' and 'gelu' is allowed)
- `--concat_hidden_states`: Number of hidden states to concatenate. (1 as default)
- `--repr_size`: Number of representation dense units. (1024 as default)
- `--num_labels`: Number of labels. (9 as default)

### Performance

[LAN (Label Attention Network)](https://aida.kisti.re.kr/gallery/17) has been proposed in [2021 KISTI AI/ML Competition](https://aida.kisti.re.kr/notice/7).<br>
Expand Down
159 changes: 159 additions & 0 deletions configure_docker_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
import os
import logging
import argparse

import tensorflow as tf

from utils import utils, formatting_utils
from cort.config import Config
from cort.modeling import CortForSequenceClassification
from tensorflow_serving.apis.predict_pb2 import PredictRequest
from tensorflow_serving.apis.prediction_log_pb2 import PredictionLog, PredictLog

formatting_utils.setup_formatter(logging.INFO)


def parse_tfrecords(tfrecord_path, model_name, maxlen, num_samples):
feature_desc = {
'input_ids': tf.io.FixedLenFeature([maxlen], tf.int64),
'sections': tf.io.FixedLenFeature([1], tf.int64),
'labels': tf.io.FixedLenFeature([1], tf.int64)
}

def _parse_feature_desc(example_proto):
example = tf.io.parse_single_example(example_proto, feature_desc)

# tf.int64 is acceptable, but tf.int32 has more performance advantages.
for name in list(example.keys()):
tensor = example[name]
if tensor.dtype == tf.int64:
tensor = tf.cast(tensor, tf.int32)
example[name] = tensor
return example

def _reconfigure_inputs(example):
return example['input_ids']

fname = tfrecord_path.format(model_name=model_name.replace('/', '_'))
logging.info('Parsing TFRecords from {}'.format(fname))

dataset = tf.data.TFRecordDataset(fname)
dataset = dataset.map(_parse_feature_desc).map(_reconfigure_inputs)
dataset = dataset.shuffle(buffer_size=1024).repeat().batch(num_samples)

input_ids = None
for input_ids in dataset:
break
return input_ids


def store_warmup_requests(args, input_ids, saved_model_path):
warmup_request_dir = os.path.join(saved_model_path, 'assets.extra')
os.makedirs(warmup_request_dir, exist_ok=True)
warmup_request_path = os.path.join(warmup_request_dir, 'tf_serving_warmup_requests')

with tf.io.TFRecordWriter(warmup_request_path) as writer:
input_ids = tf.make_tensor_proto(input_ids)

request = PredictRequest()
request.model_spec.name = args.model_spec_name
request.model_spec.signature_name = args.signature_name
request.inputs['input_ids'].CopyFrom(input_ids)

log = PredictionLog(predict_log=PredictLog(request=request))
writer.write(log.SerializeToString())
logging.info('{} warmup requests have been stored at: {}'.format(args.num_warmup_requests, warmup_request_path))


def restore_cort_classifier(args, config: Config):
cort_model = CortForSequenceClassification(config, num_labels=config.num_labels)
cort_model.trainable = False

# Restore from checkpoint
checkpoint = tf.train.Checkpoint(model=cort_model)
checkpoint.restore(args.checkpoint_path).expect_partial()

serving = CortForSequenceClassification.Serving(config, cort_model)
serving(serving.dummy_inputs)
logging.info('Restored model checkpoint from: {}'.format(args.checkpoint_path))
return serving


def store_as_saved_model(cort_model, signature_name, filepath):
maxlen = cort_model.config.pretrained_config.max_position_embeddings

@tf.function(input_signature=[tf.TensorSpec(shape=(None, maxlen), dtype=tf.int32, name='input_ids')])
def _eval_wrapper(input_ids):
return cort_model(input_ids)

signatures = _eval_wrapper.get_concrete_function()
tf.saved_model.save(cort_model, filepath, signatures={
signature_name: signatures
})
logging.info('Servable CoRT classifier has been written as SavedModel format at: {}'.format(filepath))


def main():
parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('--checkpoint_path', required=True,
help='Location of trained model checkpoint.')
parser.add_argument('--saved_model_dir', default='./models',
help='Location of SavedModel to be stored.')
parser.add_argument('--model_spec_name', default='cort',
help='Name of model spec.')
parser.add_argument('--model_spec_version', default='1',
help='Version of model spec.')
parser.add_argument('--signature_name', default='serving_default',
help='Name of signature of SavedModel')
parser.add_argument('--model_name', default='klue/roberta-base',
help='Name of pre-trained models. (One of korscibert, korscielectra, huggingface models)')
parser.add_argument('--tfrecord_path', default='./data/tfrecords/{model_name}/eval.tfrecord',
help='Location of TFRecord file for warmup requests. {model_name} is a placeholder.')
parser.add_argument('--num_warmup_requests', default=10, type=int,
help='Number of warmup requests. Pass 0 to skip')
parser.add_argument('--repr_classifier', default='seq_cls',
help='Name of classification head for classifier. (One of seq_cls and bi_lstm is allowed)')
parser.add_argument('--repr_act', default='tanh',
help='Name of activation function for representation. (One of tanh and gelu is allowed)')
parser.add_argument('--concat_hidden_states', default=1, type=int,
help='Number of hidden states to concatenate.')
parser.add_argument('--repr_size', default=1024, type=int,
help='Number of representation dense units')
parser.add_argument('--num_labels', default=9, type=int,
help='Number of labels')

# Configurable pre-defined variables
parser.add_argument('--korscibert_vocab', default='./cort/pretrained/korscibert/vocab_kisti.txt')
parser.add_argument('--korscibert_ckpt', default='./cort/pretrained/korscibert/model.ckpt-262500')
parser.add_argument('--korscielectra_vocab', default='./cort/pretrained/korscielectra/data/vocab.txt')
parser.add_argument('--korscielectra_ckpt', default='./cort/pretrained/korscielectra/data/models/korsci_base')
parser.add_argument('--classifier_dropout_prob', default=0.1, type=float)

# Parser arguments
args = parser.parse_args()
config = Config(**vars(args))
config.pretrained_config = utils.parse_pretrained_config(config)
saved_model_path = os.path.join(args.saved_model_dir, args.model_spec_name, args.model_spec_version)

cort_serving = restore_cort_classifier(args, config)

store_as_saved_model(cort_serving, args.signature_name, saved_model_path)

if args.num_warmup_requests > 0:
maxlen = config.pretrained_config.max_position_embeddings
input_ids = parse_tfrecords(args.tfrecord_path, args.model_name, maxlen, num_samples=args.num_warmup_requests)
store_warmup_requests(args, input_ids, saved_model_path)

logging.info('Finishing all necessary jobs')
logging.info('Run following command to build and run Docker container:')
logging.info(
' MODEL_DIR={} MODEL_NAME={} MODEL_VERSION={} docker build -t cort/serving:latest .'
.format(args.saved_model_dir,
args.model_spec_name,
args.model_spec_version)
)
logging.info(' docker run -d -p 8500:8500 --name cort-serving cort/serving')


if __name__ == '__main__':
main()
33 changes: 33 additions & 0 deletions cort/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,39 @@ def call(self, inputs, training=None, mask=None):
def get_config(self):
return super(CortForSequenceClassification, self).get_config()

class Serving(models.Model):

def __init__(self,
config: ConfigLike,
cort_model: "CortForSequenceClassification",
calc_correlation=True, **kwargs):
super(CortForSequenceClassification.Serving, self).__init__(**kwargs)
self.config = Config.parse_config(config)
self.cort_model = cort_model
self.calc_correlation = calc_correlation
self.dummy_inputs = tf.zeros(
shape=(1, self.config.pretrained_config.max_position_embeddings), dtype=tf.int32
)

def call(self, inputs, training=None, mask=None):
_, cort_outputs = self.cort_model(inputs)

outputs = {
'logits': cort_outputs['logits'],
'probs': cort_outputs['probs']
}
if self.calc_correlation:
attentions = cort_outputs['attentions']
attention_maps = []
for attention in attentions:
reduced = tf.reduce_mean(attention, axis=1)
attention_maps.append(reduced)

reduced_attention = tf.concat(attention_maps, axis=1)
reduced_attention = tf.reduce_mean(reduced_attention, axis=1)
outputs['correlations'] = reduced_attention
return outputs


class CortMainLayer(layers.Layer):

Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ tensorflow==2.10.0
tensorflow-addons==0.18.0
tensorflow-estimator==2.10.0
tensorflow-io-gcs-filesystem==0.27.0
tensorflow-serving-api==2.10.0
termcolor==2.0.1
terminado==0.15.0
threadpoolctl==3.1.0
Expand Down

0 comments on commit 988110e

Please sign in to comment.